From 9580e48bbc9f04321b9855a56b6e7403056a68e4 Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 15 Jul 2012 12:25:00 -0400 Subject: [PATCH 001/678] Add compiler flags --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 64562abe90a..7c51ce36b64 100644 --- a/Makefile +++ b/Makefile @@ -350,8 +350,8 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ CFLAGS_MODULE = AFLAGS_MODULE = LDFLAGS_MODULE = -CFLAGS_KERNEL = -AFLAGS_KERNEL = +CFLAGS_KERNEL = -O2 -mtune=cortex-a9 -ftree-vectorize -ffast-math -fsingle-precision-constant +AFLAGS_KERNEL = -O2 -mtune=cortex-a9 -ftree-vectorize -ffast-math -fsingle-precision-constant CFLAGS_GCOV = -fprofile-arcs -ftest-coverage From 8be014aeb75b4585ce6eb9f9468a03442c563d30 Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 15 Jul 2012 12:26:25 -0400 Subject: [PATCH 002/678] Add motley grouper defconfig --- arch/arm/configs/motley_grouper_defconfig | 3340 +++++++++++++++++++++ 1 file changed, 3340 insertions(+) create mode 100644 arch/arm/configs/motley_grouper_defconfig diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig new file mode 100644 index 00000000000..42eea718501 --- /dev/null +++ b/arch/arm/configs/motley_grouper_defconfig @@ -0,0 +1,3340 @@ +# +# Automatically generated file; DO NOT EDIT. +# Linux/arm 3.1.10 Kernel Configuration +# +CONFIG_ARM=y +CONFIG_HAVE_PWM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_HAVE_SCHED_CLOCK=y +CONFIG_GENERIC_GPIO=y +# CONFIG_ARCH_USES_GETTIMEOFFSET is not set +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_KTIME_SCALAR=y +CONFIG_HAVE_PROC_CPU=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_LOCKBREAK=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_NEED_DMA_MAP_STATE=y +CONFIG_FIQ=y +CONFIG_ARCH_PROVIDES_UDELAY=y +CONFIG_VECTORS_BASE=0xffff0000 +# CONFIG_ARM_PATCH_PHYS_VIRT is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_HAVE_IRQ_WORK=y +CONFIG_IRQ_WORK=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_CROSS_COMPILE="" +CONFIG_LOCALVERSION="-motley" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_HAVE_KERNEL_LZO=y +# CONFIG_KERNEL_GZIP is not set +CONFIG_KERNEL_LZMA=y +# CONFIG_KERNEL_LZO is not set +CONFIG_DEFAULT_HOSTNAME="(none)" +CONFIG_SWAP=y +# CONFIG_SYSVIPC is not set +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_FHANDLE is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_HAVE_GENERIC_HARDIRQS=y + +# +# IRQ subsystem +# +CONFIG_GENERIC_HARDIRQS=y +CONFIG_HAVE_SPARSE_IRQ=y +CONFIG_GENERIC_IRQ_SHOW=y +# CONFIG_SPARSE_IRQ is not set + +# +# RCU Subsystem +# +CONFIG_TREE_PREEMPT_RCU=y +CONFIG_PREEMPT_RCU=y +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_RCU_BOOST is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +# CONFIG_CGROUP_DEVICE is not set +# CONFIG_CPUSETS is not set +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_CGROUP_PERF is not set +CONFIG_CGROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +# CONFIG_BLK_CGROUP is not set +# CONFIG_NAMESPACES is not set +CONFIG_SCHED_AUTOGROUP=y +# CONFIG_SYSFS_DEPRECATED is not set +# CONFIG_RELAY is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_PANIC_TIMEOUT=10 +CONFIG_EXPERT=y +CONFIG_UID16=y +# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +# CONFIG_ELF_CORE is not set +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_ASHMEM=y +CONFIG_AIO=y +CONFIG_EMBEDDED=y +CONFIG_HAVE_PERF_EVENTS=y +CONFIG_PERF_USE_VMALLOC=y + +# +# Kernel Performance Events And Counters +# +CONFIG_PERF_EVENTS=y +# CONFIG_PERF_COUNTERS is not set +# CONFIG_DEBUG_PERF_USE_VMALLOC is not set +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y +CONFIG_COMPAT_BRK=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y +CONFIG_OPROFILE=y +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_USE_GENERIC_SMP_HELPERS=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +CONFIG_HAVE_HW_BREAKPOINT=y + +# +# GCOV-based kernel profiling +# +# CONFIG_GCOV_KERNEL is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBDAF=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_BSGLIB is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_IOSCHED_SIO=y +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +# CONFIG_DEFAULT_SIO is not set +CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +# CONFIG_INLINE_SPIN_UNLOCK is not set +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQ is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +# CONFIG_INLINE_READ_UNLOCK is not set +# CONFIG_INLINE_READ_UNLOCK_BH is not set +# CONFIG_INLINE_READ_UNLOCK_IRQ is not set +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +# CONFIG_INLINE_WRITE_UNLOCK is not set +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQ is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +CONFIG_MUTEX_SPIN_ON_OWNER=y +CONFIG_FREEZER=y + +# +# System Type +# +CONFIG_MMU=y +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_VEXPRESS is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_BCMRING is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_CNS3XXX is not set +# CONFIG_ARCH_GEMINI is not set +# CONFIG_ARCH_PRIMA2 is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_MXS is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_DOVE is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LPC32XX is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_NUC93X is not set +CONFIG_ARCH_TEGRA=y +# CONFIG_ARCH_PNX4008 is not set +# CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_SHMOBILE is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_S5P64X0 is not set +# CONFIG_ARCH_S5PC100 is not set +# CONFIG_ARCH_S5PV210 is not set +# CONFIG_ARCH_EXYNOS4 is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_TCC_926 is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_U8500 is not set +# CONFIG_ARCH_NOMADIK is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set +# CONFIG_PLAT_SPEAR is not set +# CONFIG_ARCH_VT8500 is not set +# CONFIG_ARCH_ZYNQ is not set +CONFIG_GPIO_PCA953X=y +# CONFIG_KEYBOARD_GPIO_POLLED is not set + +# +# System MMU +# + +# +# NVIDIA Tegra options +# +CONFIG_ARCH_TEGRA_3x_SOC=y +CONFIG_ARCH_TEGRA_HAS_DUAL_3D=y +CONFIG_ARCH_TEGRA_HAS_DUAL_CPU_CLUSTERS=y +CONFIG_ARCH_TEGRA_HAS_PCIE=y +CONFIG_ARCH_TEGRA_HAS_SATA=y +CONFIG_TEGRA_PCI=y + +# +# Tegra board type +# +# CONFIG_MACH_TEGRA_DT is not set +# CONFIG_MACH_ARUBA is not set +CONFIG_MACH_CARDHU=y +# CONFIG_MACH_P1852 is not set +CONFIG_MACH_TEGRA_ENTERPRISE=y +# CONFIG_MACH_KAI is not set +CONFIG_MACH_GROUPER=y +CONFIG_TEGRA_SILICON_PLATFORM=y +# CONFIG_TEGRA_SIMULATION_PLATFORM is not set +# CONFIG_TEGRA_FPGA_PLATFORM is not set +CONFIG_TEGRA_DEBUG_UART_NONE=y +CONFIG_TEGRA_SYSTEM_DMA=y +CONFIG_TEGRA_PWM=y +CONFIG_TEGRA_FIQ_DEBUGGER=y +# CONFIG_TEGRA_CARDHU_DSI is not set +CONFIG_TEGRA_EMC_SCALING_ENABLE=y +CONFIG_TEGRA_CPU_DVFS=y +CONFIG_TEGRA_CORE_DVFS=y +CONFIG_TEGRA_IOVMM_SMMU=y +# CONFIG_TEGRA_SMMU_BASE_AT_E0000000 is not set +# CONFIG_TEGRA_IOVMM_SMMU_SYSFS is not set +CONFIG_TEGRA_IOVMM=y +CONFIG_TEGRA_AVP_KERNEL_ON_SMMU=y +CONFIG_TEGRA_THERMAL_THROTTLE=y +CONFIG_WIFI_CONTROL_FUNC=y +CONFIG_TEGRA_CLOCK_DEBUG_WRITE=y +CONFIG_TEGRA_CLUSTER_CONTROL=y +CONFIG_TEGRA_AUTO_HOTPLUG=y +CONFIG_TEGRA_MC_EARLY_ACK=y +CONFIG_TEGRA_MC_PROFILE=y +CONFIG_TEGRA_EDP_LIMITS=y +CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 +# CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set +CONFIG_USB_HOTPLUG=y +CONFIG_TEGRA_DYNAMIC_PWRDET=y +CONFIG_TEGRA_EDP_EXACT_FREQ=y +# CONFIG_TEGRA_USB_MODEM_POWER is not set +# CONFIG_TEGRA_BB_XMM_POWER is not set +# CONFIG_TEGRA_BB_XMM_POWER2 is not set +# CONFIG_TEGRA_THERMAL_SYSFS is not set +CONFIG_TEGRA_PLLM_RESTRICTED=y +# CONFIG_TEGRA_WDT_RECOVERY is not set +CONFIG_TEGRA_LP2_ARM_TWD=y +CONFIG_TEGRA_SLOW_CSITE=y +# CONFIG_TEGRA_PREINIT_CLOCKS is not set + +# +# Processor Type +# +CONFIG_CPU_V7=y +CONFIG_CPU_32v6K=y +CONFIG_CPU_32v7=y +CONFIG_CPU_ABRT_EV7=y +CONFIG_CPU_PABRT_V7=y +CONFIG_CPU_CACHE_V7=y +CONFIG_CPU_CACHE_VIPT=y +CONFIG_CPU_COPY_V6=y +CONFIG_CPU_TLB_V7=y +CONFIG_CPU_HAS_ASID=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_ARM_THUMBEE is not set +CONFIG_SWP_EMULATE=y +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +# CONFIG_CPU_BPREDICT_DISABLE is not set +CONFIG_OUTER_CACHE=y +CONFIG_OUTER_CACHE_SYNC=y +CONFIG_CACHE_L2X0=y +CONFIG_CACHE_PL310=y +CONFIG_ARM_L1_CACHE_SHIFT=5 +CONFIG_ARM_DMA_MEM_BUFFERABLE=y +CONFIG_ARM_SAVE_DEBUG_CONTEXT=y +CONFIG_CPA=y +CONFIG_CPU_HAS_PMU=y +# CONFIG_ARM_ERRATA_430973 is not set +# CONFIG_ARM_ERRATA_458693 is not set +# CONFIG_ARM_ERRATA_460075 is not set +CONFIG_ARM_ERRATA_742230=y +# CONFIG_ARM_ERRATA_742231 is not set +# CONFIG_PL310_ERRATA_588369 is not set +# CONFIG_ARM_ERRATA_720789 is not set +# CONFIG_PL310_ERRATA_727915 is not set +CONFIG_ARM_ERRATA_743622=y +CONFIG_ARM_ERRATA_751472=y +# CONFIG_ARM_ERRATA_753970 is not set +CONFIG_ARM_ERRATA_754322=y +# CONFIG_ARM_ERRATA_754327 is not set +# CONFIG_ARM_ERRATA_764369 is not set +# CONFIG_ARM_ERRATA_720791 is not set +CONFIG_ARM_ERRATA_752520=y +# CONFIG_PL310_ERRATA_769419 is not set +CONFIG_ARM_GIC=y +CONFIG_FIQ_GLUE=y +CONFIG_FIQ_DEBUGGER=y +# CONFIG_FIQ_DEBUGGER_NO_SLEEP is not set +# CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON is not set +CONFIG_FIQ_DEBUGGER_CONSOLE=y +# CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE is not set +CONFIG_GIC_SET_MULTIPLE_CPUS=y + +# +# Bus support +# +CONFIG_PCI=y +CONFIG_PCI_SYSCALL=y +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y +CONFIG_SMP_ON_UP=y +CONFIG_HAVE_ARM_SCU=y +CONFIG_HAVE_ARM_TWD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_TASK_SIZE_3G_LESS_16M=y +# CONFIG_TASK_SIZE_3G_LESS_24M is not set +CONFIG_TASK_SIZE=0xBF000000 +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_LOCAL_TIMERS=y +CONFIG_ARCH_NR_GPIO=512 +# CONFIG_PREEMPT_NONE is not set +# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_PREEMPT=y +CONFIG_PREEMPT_COUNT=y +CONFIG_HZ=100 +# CONFIG_THUMB2_KERNEL is not set +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +CONFIG_HAVE_ARCH_PFN_VALID=y +CONFIG_HIGHMEM=y +# CONFIG_HIGHPTE is not set +CONFIG_HW_PERF_EVENTS=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_HAVE_MEMBLOCK=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_COMPACTION is not set +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +# CONFIG_CLEANCACHE is not set +CONFIG_FORCE_MAX_ZONEORDER=11 +CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set +# CONFIG_SECCOMP is not set +# CONFIG_CC_STACKPROTECTOR is not set +# CONFIG_DEPRECATED_PARAM_STRUCT is not set +CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART=y + +# +# Boot options +# +# CONFIG_USE_OF is not set +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="tegra_wdt.heartbeat=30" +# CONFIG_CMDLINE_FROM_BOOTLOADER is not set +CONFIG_CMDLINE_EXTEND=y +# CONFIG_CMDLINE_FORCE is not set +# CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +# CONFIG_AUTO_ZRELADDR is not set + +# +# CPU Power Management +# + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_INTERACTIVE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y + +# +# ARM CPU frequency scaling drivers +# +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +CONFIG_VFP=y +CONFIG_VFPv3=y +CONFIG_NEON=y + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HAS_WAKELOCK=y +CONFIG_HAS_EARLYSUSPEND=y +CONFIG_WAKELOCK=y +CONFIG_WAKELOCK_STAT=y +CONFIG_USER_WAKELOCK=y +CONFIG_EARLYSUSPEND=y +# CONFIG_NO_USER_SPACE_SCREEN_ACCESS_CONTROL is not set +CONFIG_FB_EARLYSUSPEND=y +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_RUNTIME=y +CONFIG_PM=y +CONFIG_PM_DEBUG=y +# CONFIG_PM_ADVANCED_DEBUG is not set +# CONFIG_PM_TEST_SUSPEND is not set +CONFIG_CAN_PM_TRACE=y +# CONFIG_APM_EMULATION is not set +CONFIG_PM_CLK=y +CONFIG_SUSPEND_TIME=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +CONFIG_XFRM_IPCOMP=y +CONFIG_NET_KEY=y +# CONFIG_NET_KEY_MIGRATE is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +# CONFIG_IP_FIB_TRIE_STATS is not set +CONFIG_IP_MULTIPLE_TABLES=y +# CONFIG_IP_ROUTE_MULTIPATH is not set +# CONFIG_IP_ROUTE_VERBOSE is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE_DEMUX is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +CONFIG_INET_ESP=y +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +# CONFIG_IPV6_ROUTE_INFO is not set +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_INET6_XFRM_TUNNEL=y +CONFIG_INET6_TUNNEL=y +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_INET6_XFRM_MODE_BEET=y +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +# CONFIG_IPV6_SIT_6RD is not set +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_MULTIPLE_TABLES=y +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_ANDROID_PARANOID_NETWORK=y +CONFIG_NET_ACTIVITY_STATS=y +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETWORK_PHY_TIMESTAMPING is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +# CONFIG_NF_CONNTRACK_TIMESTAMP is not set +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_GRE=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_BROADCAST=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +# CONFIG_NF_CONNTRACK_SNMP is not set +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +# CONFIG_NF_CONNTRACK_SIP is not set +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XTABLES=y + +# +# Xtables combined modules +# +CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_CONNMARK=y + +# +# Xtables targets +# +# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +# CONFIG_NETFILTER_XT_TARGET_CT is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +# CONFIG_NETFILTER_XT_TARGET_HL is not set +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set +# CONFIG_NETFILTER_XT_TARGET_TEE is not set +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set +# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set + +# +# Xtables matches +# +# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set +# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +# CONFIG_NETFILTER_XT_MATCH_CPU is not set +# CONFIG_NETFILTER_XT_MATCH_DCCP is not set +# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_HL=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set +# CONFIG_NETFILTER_XT_MATCH_OSF is not set +# CONFIG_NETFILTER_XT_MATCH_OWNER is not set +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y +# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set +# CONFIG_NETFILTER_XT_MATCH_REALM is not set +# CONFIG_NETFILTER_XT_MATCH_RECENT is not set +# CONFIG_NETFILTER_XT_MATCH_SCTP is not set +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +# CONFIG_IP_SET is not set +# CONFIG_IP_VS is not set + +# +# IP: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV4=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_REJECT_SKERR=y +CONFIG_IP_NF_TARGET_LOG=y +# CONFIG_IP_NF_TARGET_ULOG is not set +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_NF_NAT_PROTO_DCCP=y +CONFIG_NF_NAT_PROTO_GRE=y +CONFIG_NF_NAT_PROTO_UDPLITE=y +CONFIG_NF_NAT_PROTO_SCTP=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +CONFIG_NF_NAT_TFTP=y +CONFIG_NF_NAT_AMANDA=y +CONFIG_NF_NAT_PPTP=y +CONFIG_NF_NAT_H323=y +# CONFIG_NF_NAT_SIP is not set +CONFIG_IP_NF_MANGLE=y +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +# CONFIG_IP_NF_TARGET_ECN is not set +# CONFIG_IP_NF_TARGET_TTL is not set +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_CONNTRACK_IPV6=y +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT_SKERR=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_L2TP is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +CONFIG_NET_SCH_HTB=y +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFB is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set +# CONFIG_NET_SCH_MQPRIO is not set +# CONFIG_NET_SCH_CHOKE is not set +# CONFIG_NET_SCH_QFQ is not set +CONFIG_NET_SCH_INGRESS=y + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +CONFIG_NET_CLS_U32=y +# CONFIG_CLS_U32_PERF is not set +# CONFIG_CLS_U32_MARK is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +CONFIG_NET_EMATCH_U32=y +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_GACT=y +# CONFIG_GACT_PROB is not set +CONFIG_NET_ACT_MIRRED=y +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set +# CONFIG_NET_ACT_CSUM is not set +# CONFIG_NET_CLS_IND is not set +CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set +CONFIG_DNS_RESOLVER=y +# CONFIG_BATMAN_ADV is not set +CONFIG_RPS=y +CONFIG_RFS_ACCEL=y +CONFIG_XPS=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +CONFIG_BT=y +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y +CONFIG_BT_RFCOMM=y +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=y +# CONFIG_BT_BNEP_MC_FILTER is not set +# CONFIG_BT_BNEP_PROTO_FILTER is not set +CONFIG_BT_HIDP=y + +# +# Bluetooth device drivers +# +# CONFIG_BT_HCIBTUSB is not set +# CONFIG_BT_HCIBTSDIO is not set +CONFIG_BT_HCIUART=y +CONFIG_BT_HCIUART_H4=y +# CONFIG_BT_HCIUART_BCSP is not set +# CONFIG_BT_HCIUART_ATH3K is not set +CONFIG_BT_HCIUART_LL=y +# CONFIG_BT_HCIBCM203X is not set +CONFIG_BT_BLUESLEEP=y +# CONFIG_BT_TIBLUESLEEP is not set +# CONFIG_BT_HCIBPA10X is not set +# CONFIG_BT_HCIBFUSB is not set +# CONFIG_BT_HCIVHCI is not set +# CONFIG_BT_MRVL is not set +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y +CONFIG_WIRELESS=y +CONFIG_WEXT_CORE=y +CONFIG_WEXT_PROC=y +CONFIG_CFG80211=y +CONFIG_NL80211_TESTMODE=y +# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +# CONFIG_CFG80211_REG_DEBUG is not set +CONFIG_CFG80211_DEFAULT_PS=y +# CONFIG_CFG80211_DEBUGFS is not set +# CONFIG_CFG80211_INTERNAL_REGDB is not set +CONFIG_CFG80211_WEXT=y +CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set +# CONFIG_CFG80211_ALLOW_RECONNECT is not set +# CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +CONFIG_RFKILL_PM=y +# CONFIG_RFKILL_INPUT is not set +# CONFIG_RFKILL_REGULATOR is not set +# CONFIG_RFKILL_GPIO is not set +# CONFIG_NET_9P is not set +CONFIG_CAIF=y +# CONFIG_CAIF_DEBUG is not set +CONFIG_CAIF_NETDEV=y +# CONFIG_CEPH_LIB is not set +CONFIG_NFC=y + +# +# Near Field Communication (NFC) devices +# +CONFIG_PN544_NFC=y +# CONFIG_NFC_PN533 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="" +# CONFIG_DEVTMPFS is not set +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_REGMAP=y +CONFIG_REGMAP_I2C=y +# CONFIG_DMA_SHARED_BUFFER is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 +# CONFIG_BLK_DEV_CRYPTOLOOP is not set + +# +# DRBD disabled because PROC_FS, INET or CONNECTOR not selected +# +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_BLK_DEV_RBD is not set +# CONFIG_SENSORS_LIS3LV02D is not set +CONFIG_MISC_DEVICES=y +CONFIG_AD525X_DPOT=y +CONFIG_AD525X_DPOT_I2C=y +# CONFIG_AD525X_DPOT_SPI is not set +# CONFIG_PHANTOM is not set +# CONFIG_INTEL_MID_PTI is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +CONFIG_APDS9802ALS=y +# CONFIG_ISL29003 is not set +# CONFIG_ISL29020 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_SENSORS_BH1780 is not set +# CONFIG_SENSORS_BH1770 is not set +# CONFIG_SENSORS_APDS990X is not set +# CONFIG_HMC6352 is not set +# CONFIG_SENSORS_AK8975 is not set +CONFIG_SENSORS_NCT1008=y +# CONFIG_DS1682 is not set +# CONFIG_TI_DAC7512 is not set +CONFIG_UID_STAT=y +# CONFIG_BMP085 is not set +# CONFIG_PCH_PHUB is not set +# CONFIG_USB_SWITCH_FSA9480 is not set +# CONFIG_WL127X_RFKILL is not set +# CONFIG_APANIC is not set +# CONFIG_BCM4329_RFKILL is not set +CONFIG_BCM4330_RFKILL=y +CONFIG_TEGRA_CRYPTO_DEV=y +CONFIG_MAX1749_VIBRATOR=y +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +CONFIG_EEPROM_AT24=y +# CONFIG_EEPROM_AT25 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_MAX6875 is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_EEPROM_93XX46 is not set +# CONFIG_CB710_CORE is not set +# CONFIG_IWMC3200TOP is not set + +# +# Texas Instruments shared transport line discipline +# +# CONFIG_TI_ST is not set +# CONFIG_ST_GPS is not set +# CONFIG_SENSORS_LIS3_SPI is not set +# CONFIG_SENSORS_LIS3_I2C is not set +CONFIG_TEGRA_BB_SUPPORT=y +CONFIG_TEGRA_BB_POWER=y +CONFIG_TEGRA_BB_M7400=y +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_SCSI_MOD=y +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set +CONFIG_SCSI_MULTI_LUN=y +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +# CONFIG_SCSI_WAIT_SCAN is not set + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_ISCSI_BOOT_SYSFS is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set +# CONFIG_SCSI_CXGB4_ISCSI is not set +# CONFIG_SCSI_BNX2_ISCSI is not set +# CONFIG_SCSI_BNX2X_FCOE is not set +# CONFIG_BE2ISCSI is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_HPSA is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_3W_SAS is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_MPT2SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_PMCRAID is not set +# CONFIG_SCSI_PM8001 is not set +# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_BFA_FC is not set +# CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set +# CONFIG_ATA is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set +CONFIG_DM_CRYPT=y +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_RAID is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set +CONFIG_DM_UEVENT=y +# CONFIG_DM_FLAKEY is not set +# CONFIG_TARGET_CORE is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_FIREWIRE_NOSY is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +# CONFIG_IFB is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y +# CONFIG_VETH is not set +# CONFIG_ARCNET is not set +CONFIG_MII=y +# CONFIG_PHYLIB is not set +# CONFIG_NET_ETHERNET is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_E1000E is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set +# CONFIG_IGBVF is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +# CONFIG_CNIC is not set +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set +# CONFIG_JME is not set +# CONFIG_STMMAC_ETH is not set +# CONFIG_PCH_GBE is not set +# CONFIG_FTGMAC100 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set +CONFIG_WLAN=y +# CONFIG_ATMEL is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_ATH_COMMON is not set +# CONFIG_BCM4329 is not set +CONFIG_BCMDHD=y +CONFIG_BCMDHD_FW_PATH="/system/vendor/firmware/fw_bcmdhd.bin" +CONFIG_BCMDHD_NVRAM_PATH="/system/etc/nvram.txt" +# CONFIG_DHD_USE_STATIC_BUF is not set +# CONFIG_DHD_USE_SCHED_SCAN is not set +CONFIG_DHD_ENABLE_P2P=y +# CONFIG_HOSTAP is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_IWM is not set +# CONFIG_LIBERTAS is not set +# CONFIG_HERMES is not set +# CONFIG_MWIFIEX is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +CONFIG_USB_USBNET=y +CONFIG_USB_NET_AX8817X=y +CONFIG_USB_NET_CDCETHER=y +# CONFIG_USB_NET_CDC_EEM is not set +CONFIG_USB_NET_CDC_NCM=y +# CONFIG_USB_NET_DM9601 is not set +# CONFIG_USB_NET_SMSC75XX is not set +CONFIG_USB_NET_SMSC95XX=y +# CONFIG_USB_NET_GL620A is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_PLUSB is not set +# CONFIG_USB_NET_MCS7830 is not set +# CONFIG_USB_NET_RNDIS_HOST is not set +CONFIG_USB_NET_CDC_SUBSET=y +# CONFIG_USB_ALI_M5632 is not set +# CONFIG_USB_AN2720 is not set +# CONFIG_USB_BELKIN is not set +# CONFIG_USB_ARMLINUX is not set +# CONFIG_USB_EPSON2888 is not set +# CONFIG_USB_KC2190 is not set +# CONFIG_USB_NET_ZAURUS is not set +# CONFIG_USB_NET_CX82310_ETH is not set +# CONFIG_USB_NET_KALMIA is not set +# CONFIG_USB_HSO is not set +# CONFIG_USB_NET_INT51X1 is not set +# CONFIG_USB_IPHETH is not set +# CONFIG_USB_SIERRA_NET is not set +# CONFIG_USB_VL600 is not set +# CONFIG_USB_NET_RAW_IP is not set +# CONFIG_WAN is not set + +# +# CAIF transport drivers +# +# CONFIG_CAIF_TTY is not set +# CONFIG_CAIF_SPI_SLAVE is not set +# CONFIG_CAIF_HSI is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=y +CONFIG_PPP_SYNC_TTY=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_MPPE=y +# CONFIG_PPPOE is not set +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_SLIP is not set +CONFIG_SLHC=y +# CONFIG_NET_FC is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_VMXNET3 is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +CONFIG_INPUT_FF_MEMLESS=y +# CONFIG_INPUT_POLLDEV is not set +# CONFIG_INPUT_SPARSEKMAP is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set +CONFIG_INPUT_KEYRESET=y +CONFIG_INPUT_LID=y + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ADP5588 is not set +# CONFIG_KEYBOARD_ADP5589 is not set +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_QT1070 is not set +# CONFIG_KEYBOARD_QT2160 is not set +# CONFIG_KEYBOARD_LKKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_KEYBOARD_TCA6416 is not set +# CONFIG_KEYBOARD_MATRIX is not set +# CONFIG_KEYBOARD_LM8323 is not set +# CONFIG_KEYBOARD_MAX7359 is not set +# CONFIG_KEYBOARD_MCS is not set +# CONFIG_KEYBOARD_MPR121 is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_KEYBOARD_TEGRA=y +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_AS5011 is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TABLET_USB_WACOM=y +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_ADS7846 is not set +# CONFIG_TOUCHSCREEN_AD7877 is not set +# CONFIG_TOUCHSCREEN_AD7879 is not set +# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set +# CONFIG_TOUCHSCREEN_BU21013 is not set +# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set +# CONFIG_TOUCHSCREEN_DYNAPRO is not set +# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set +# CONFIG_TOUCHSCREEN_EETI is not set +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set +# CONFIG_TOUCHSCREEN_MAX11801 is not set +# CONFIG_TOUCHSCREEN_MCS5000 is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_PANJIT_I2C is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2005 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set +# CONFIG_TOUCHSCREEN_W90X900 is not set +# CONFIG_TOUCHSCREEN_ST1232 is not set +# CONFIG_TOUCHSCREEN_TPS6507X is not set +CONFIG_TOUCHSCREEN_ELAN_TF_3K=y +CONFIG_TOUCHSCREEN_RM31080A=y +CONFIG_TOUCHSCREEN_SYN_RMI4_SPI=y +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_AD714X is not set +# CONFIG_INPUT_MMA8450 is not set +# CONFIG_INPUT_MPU3050 is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +CONFIG_INPUT_KEYCHORD=y +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_KXTJ9 is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_INPUT_PCF8574 is not set +# CONFIG_INPUT_PWM_BEEPER is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set +# CONFIG_INPUT_ADXL34X is not set +# CONFIG_INPUT_CMA3000 is not set +# CONFIG_INPUT_ALPS_GPIO_SCROLLWHEEL is not set +# CONFIG_INPUT_CAPELLA_CM3217 is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO_ALTERA_PS2 is not set +# CONFIG_SERIO_PS2MULT is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set +# CONFIG_N_GSM is not set +# CONFIG_TRACE_SINK is not set +CONFIG_DEVMEM=y +# CONFIG_DEVKMEM is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_TEGRA=y +# CONFIG_SERIAL_MAX3100 is not set +# CONFIG_SERIAL_MAX3107 is not set +# CONFIG_SERIAL_MFD_HSU is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +# CONFIG_SERIAL_TIMBERDALE is not set +# CONFIG_SERIAL_ALTERA_JTAGUART is not set +# CONFIG_SERIAL_ALTERA_UART is not set +# CONFIG_SERIAL_IFX6X60 is not set +# CONFIG_SERIAL_PCH_UART is not set +# CONFIG_SERIAL_XILINX_PS_UART is not set +# CONFIG_TTY_PRINTK is not set +# CONFIG_HVC_DCC is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_DCC_TTY is not set +# CONFIG_RAMOOPS is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_COMPAT is not set +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_MUX=y + +# +# Multiplexer I2C Chip support +# +# CONFIG_I2C_MUX_GPIO is not set +# CONFIG_I2C_MUX_PCA9541 is not set +CONFIG_I2C_MUX_PCA954x=y +# CONFIG_I2C_SLAVE is not set +# CONFIG_I2C_HELPER_AUTO is not set +# CONFIG_I2C_SMBUS is not set + +# +# I2C Algorithms +# +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_ISCH is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_DESIGNWARE is not set +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_INTEL_MID is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_PXA_PCI is not set +# CONFIG_I2C_SIMTEC is not set +CONFIG_I2C_TEGRA=y +# CONFIG_I2C_XILINX is not set +# CONFIG_I2C_EG20T is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_DIOLAN_U2C is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +# CONFIG_SPI_ALTERA is not set +# CONFIG_SPI_BITBANG is not set +# CONFIG_SPI_GPIO is not set +# CONFIG_SPI_OC_TINY is not set +# CONFIG_SPI_PXA2XX_PCI is not set +CONFIG_SPI_TEGRA=y +CONFIG_SPI_SLAVE_TEGRA=y +# CONFIG_SPI_TOPCLIFF_PCH is not set +# CONFIG_SPI_XILINX is not set +# CONFIG_SPI_DESIGNWARE is not set + +# +# SPI Protocol Masters +# +# CONFIG_SPI_SPIDEV is not set +# CONFIG_SPI_TLE62X0 is not set + +# +# PPS support +# +# CONFIG_PPS is not set + +# +# PPS generators support +# + +# +# PTP clock support +# + +# +# Enable Device Drivers -> PPS to see the PTP clock options. +# +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y + +# +# Memory mapped GPIO drivers: +# +# CONFIG_GPIO_GENERIC_PLATFORM is not set +# CONFIG_GPIO_IT8761E is not set +# CONFIG_GPIO_VX855 is not set + +# +# I2C GPIO expanders: +# +# CONFIG_GPIO_MAX7300 is not set +# CONFIG_GPIO_MAX732X is not set +# CONFIG_GPIO_PCA953X_IRQ is not set +# CONFIG_GPIO_PCF857X is not set +# CONFIG_GPIO_SX150X is not set +# CONFIG_GPIO_ADP5588 is not set + +# +# PCI GPIO expanders: +# +# CONFIG_GPIO_BT8XX is not set +# CONFIG_GPIO_ML_IOH is not set +# CONFIG_GPIO_RDC321X is not set + +# +# SPI GPIO expanders: +# +# CONFIG_GPIO_MAX7301 is not set +# CONFIG_GPIO_MCP23S08 is not set +# CONFIG_GPIO_MC33880 is not set +# CONFIG_GPIO_74X164 is not set + +# +# AC97 GPIO expanders: +# + +# +# MODULbus GPIO expanders: +# +CONFIG_GPIO_TPS65910=y +# CONFIG_W1 is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_TEST_POWER is not set +# CONFIG_BATTERY_DS2780 is not set +# CONFIG_BATTERY_DS2782 is not set +# CONFIG_BATTERY_BQ20Z75 is not set +# CONFIG_BATTERY_BQ27x00 is not set +# CONFIG_CHARGER_TPS8003X is not set +# CONFIG_BATTERY_GAUGE_TPS8003X is not set +CONFIG_CHARGER_SMB347=y +# CONFIG_BATTERY_MAX17040 is not set +# CONFIG_BATTERY_MAX17042 is not set +# CONFIG_BATTERY_MAX17048 is not set +# CONFIG_CHARGER_ISP1704 is not set +# CONFIG_CHARGER_MAX8903 is not set +# CONFIG_CHARGER_GPIO is not set +CONFIG_BATTERY_BQ27541=y +# CONFIG_TEGRA_BPC_MGMT is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Native drivers +# +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADCXX is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7411 is not set +# CONFIG_SENSORS_ADT7461 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_ASC7621 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS620 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_GPIO_FAN is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_JC42 is not set +# CONFIG_SENSORS_LINEAGE is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM70 is not set +# CONFIG_SENSORS_LM73 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4151 is not set +# CONFIG_SENSORS_LTC4215 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LTC4261 is not set +# CONFIG_SENSORS_LM95241 is not set +# CONFIG_SENSORS_LM95245 is not set +# CONFIG_SENSORS_MAX1111 is not set +# CONFIG_SENSORS_MAX16065 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX1668 is not set +# CONFIG_SENSORS_MAX6639 is not set +# CONFIG_SENSORS_MAX6642 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_NTC_THERMISTOR is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_PMBUS is not set +# CONFIG_SENSORS_SHT15 is not set +# CONFIG_SENSORS_SHT21 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMM665 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_EMC1403 is not set +# CONFIG_SENSORS_EMC2103 is not set +# CONFIG_SENSORS_EMC6W201 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SCH56XX_COMMON is not set +# CONFIG_SENSORS_SCH5627 is not set +# CONFIG_SENSORS_SCH5636 is not set +# CONFIG_SENSORS_ADS1015 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_ADS7871 is not set +# CONFIG_SENSORS_AMC6821 is not set +CONFIG_SENSORS_TEGRA_TSENSOR=y +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_TMP102 is not set +# CONFIG_SENSORS_TMP401 is not set +# CONFIG_SENSORS_TMP421 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83795 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +CONFIG_SENSORS_INA219=y +# CONFIG_SENSORS_INA230 is not set +CONFIG_SENSORS_AL3010=y +CONFIG_THERMAL=y +CONFIG_THERMAL_HWMON=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_CORE is not set +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_DW_WATCHDOG is not set +# CONFIG_MPCORE_WATCHDOG is not set +CONFIG_TEGRA_WATCHDOG=y +CONFIG_TEGRA_WATCHDOG_ENABLE_ON_PROBE=y +# CONFIG_MAX63XX_WATCHDOG is not set +# CONFIG_ALIM7101_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set +CONFIG_BCMA_POSSIBLE=y + +# +# Broadcom specific AMBA +# +# CONFIG_BCMA is not set +CONFIG_MFD_SUPPORT=y +CONFIG_MFD_CORE=y +# CONFIG_MFD_88PM860X is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_HTC_I2CPLD is not set +# CONFIG_TPS6105X is not set +# CONFIG_TPS65010 is not set +# CONFIG_TPS6507X is not set +CONFIG_MFD_TPS6586X=y +CONFIG_MFD_TPS65910=y +# CONFIG_MFD_TPS65912_I2C is not set +# CONFIG_MFD_TPS65912_SPI is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_MFD_STMPE is not set +# CONFIG_MFD_TC3589X is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_PMIC_ADP5520 is not set +# CONFIG_MFD_MAX8925 is not set +# CONFIG_MFD_MAX8997 is not set +# CONFIG_MFD_MAX8998 is not set +# CONFIG_MFD_MAX8907C is not set +CONFIG_MFD_MAX77663=y +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM831X_I2C is not set +# CONFIG_MFD_WM831X_SPI is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_WM8994 is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_MFD_MC13XXX is not set +# CONFIG_ABX500_CORE is not set +# CONFIG_EZX_PCAP is not set +# CONFIG_MFD_TIMBERDALE is not set +# CONFIG_LPC_SCH is not set +# CONFIG_MFD_RDC321X is not set +# CONFIG_MFD_JANZ_CMODIO is not set +# CONFIG_MFD_VX855 is not set +# CONFIG_MFD_WL1273_CORE is not set +# CONFIG_MFD_AAT2870_CORE is not set +CONFIG_MFD_TPS6591X=y +# CONFIG_MFD_TPS65090 is not set +# CONFIG_MFD_RC5T583 is not set +CONFIG_MFD_TPS80031=y +CONFIG_GPADC_TPS80031=y +CONFIG_MFD_RICOH583=y +CONFIG_REGULATOR=y +# CONFIG_REGULATOR_DEBUG is not set +# CONFIG_REGULATOR_DUMMY is not set +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_VIRTUAL_CONSUMER=y +# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_GPIO is not set +# CONFIG_REGULATOR_BQ24022 is not set +# CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX8649 is not set +# CONFIG_REGULATOR_MAX8660 is not set +# CONFIG_REGULATOR_MAX8952 is not set +CONFIG_REGULATOR_MAX77663=y +# CONFIG_REGULATOR_LP3971 is not set +# CONFIG_REGULATOR_LP3972 is not set +# CONFIG_REGULATOR_TPS65023 is not set +# CONFIG_REGULATOR_TPS6507X is not set +# CONFIG_REGULATOR_ISL6271A is not set +# CONFIG_REGULATOR_AD5398 is not set +CONFIG_REGULATOR_TPS6586X=y +# CONFIG_REGULATOR_TPS6524X is not set +CONFIG_REGULATOR_TPS65910=y +CONFIG_REGULATOR_TPS62360=y +CONFIG_REGULATOR_TPS6591X=y +CONFIG_REGULATOR_TPS80031=y +CONFIG_REGULATOR_RICOH583=y +# CONFIG_REGULATOR_FAN53555 is not set +CONFIG_MEDIA_SUPPORT=y + +# +# Multimedia core support +# +# CONFIG_MEDIA_CONTROLLER is not set +CONFIG_VIDEO_DEV=y +CONFIG_VIDEO_V4L2_COMMON=y +# CONFIG_DVB_CORE is not set +CONFIG_VIDEO_MEDIA=y + +# +# Multimedia drivers +# +# CONFIG_RC_CORE is not set +# CONFIG_MEDIA_ATTACH is not set +CONFIG_MEDIA_TUNER=y +# CONFIG_MEDIA_TUNER_CUSTOMISE is not set +CONFIG_MEDIA_TUNER_SIMPLE=y +CONFIG_MEDIA_TUNER_TDA8290=y +CONFIG_MEDIA_TUNER_TDA827X=y +CONFIG_MEDIA_TUNER_TDA18271=y +CONFIG_MEDIA_TUNER_TDA9887=y +CONFIG_MEDIA_TUNER_TEA5761=y +CONFIG_MEDIA_TUNER_TEA5767=y +CONFIG_MEDIA_TUNER_MT20XX=y +CONFIG_MEDIA_TUNER_XC2028=y +CONFIG_MEDIA_TUNER_XC5000=y +CONFIG_MEDIA_TUNER_XC4000=y +CONFIG_MEDIA_TUNER_MC44S803=y +CONFIG_VIDEO_V4L2=y +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +CONFIG_VIDEO_HELPER_CHIPS_AUTO=y + +# +# Audio decoders, processors and mixers +# + +# +# RDS decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# + +# +# Video encoders +# + +# +# Camera sensor devices +# + +# +# Flash devices +# + +# +# Video improvement chips +# + +# +# Miscelaneous helper chips +# +CONFIG_TEGRA_RPC=y +# CONFIG_TEGRA_AVP is not set +# CONFIG_TEGRA_MEDIASERVER is not set +CONFIG_TEGRA_NVAVP=y +CONFIG_TEGRA_CAMERA=y +CONFIG_VIDEO_MI1040=y +CONFIG_TEGRA_DTV=y +# CONFIG_VIDEO_OV5650 is not set +# CONFIG_VIDEO_OV14810 is not set +# CONFIG_VIDEO_OV9726 is not set +# CONFIG_VIDEO_OV2710 is not set +# CONFIG_VIDEO_AR0832 is not set +# CONFIG_VIDEO_SOC380 is not set +# CONFIG_TORCH_SSL3250A is not set +# CONFIG_TORCH_TPS61050 is not set +# CONFIG_VIDEO_SH532U is not set +# CONFIG_VIDEO_AD5820 is not set +# CONFIG_VIDEO_CPIA2 is not set +# CONFIG_VIDEO_SAA7134 is not set +# CONFIG_VIDEO_MXB is not set +# CONFIG_VIDEO_HEXIUM_ORION is not set +# CONFIG_VIDEO_HEXIUM_GEMINI is not set +# CONFIG_VIDEO_CAFE_CCIC is not set +# CONFIG_VIDEO_SR030PC30 is not set +# CONFIG_VIDEO_NOON010PC30 is not set +# CONFIG_SOC_CAMERA is not set +CONFIG_V4L_USB_DRIVERS=y +CONFIG_USB_VIDEO_CLASS=y +CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y +# CONFIG_USB_GSPCA is not set +# CONFIG_VIDEO_PVRUSB2 is not set +# CONFIG_VIDEO_HDPVR is not set +# CONFIG_VIDEO_EM28XX is not set +# CONFIG_VIDEO_USBVISION is not set +# CONFIG_USB_ET61X251 is not set +# CONFIG_USB_SN9C102 is not set +# CONFIG_USB_PWC is not set +# CONFIG_USB_ZR364XX is not set +# CONFIG_USB_STKWEBCAM is not set +# CONFIG_USB_S2255 is not set +# CONFIG_V4L_MEM2MEM_DRIVERS is not set +# CONFIG_RADIO_ADAPTERS is not set + +# +# Graphics support +# +CONFIG_VGA_ARB=y +CONFIG_VGA_ARB_MAX_GPUS=16 +# CONFIG_DRM is not set +# CONFIG_STUB_POULSBO is not set +# CONFIG_ION is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=y +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_WMT_GE_ROPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set +# CONFIG_FB_TMIO is not set +# CONFIG_FB_UDL is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set + +# +# NVIDIA Tegra Display Driver options +# +CONFIG_TEGRA_GRHOST=y +CONFIG_TEGRA_DC=y +CONFIG_FB_TEGRA=y +CONFIG_TEGRA_DC_EXTENSIONS=y +CONFIG_TEGRA_NVMAP=y +CONFIG_NVMAP_RECLAIM_UNPINNED_VM=y +CONFIG_NVMAP_ALLOW_SYSMEM=y +# CONFIG_NVMAP_HIGHMEM_ONLY is not set +# CONFIG_NVMAP_CARVEOUT_KILLER is not set +CONFIG_NVMAP_CARVEOUT_COMPACTOR=y +# CONFIG_NVMAP_VPR is not set +CONFIG_TEGRA_DSI=y +CONFIG_NVMAP_CONVERT_CARVEOUT_TO_IOVMM=y +CONFIG_TEGRA_NVHDCP=y +# CONFIG_TEGRA_HDMI_74MHZ_LIMIT is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +# CONFIG_LCD_L4F00242T03 is not set +# CONFIG_LCD_LMS283GF05 is not set +# CONFIG_LCD_LTV350QV is not set +# CONFIG_LCD_TDO24M is not set +# CONFIG_LCD_VGG2432A4 is not set +# CONFIG_LCD_PLATFORM is not set +# CONFIG_LCD_S6E63M0 is not set +# CONFIG_LCD_LD9040 is not set +# CONFIG_LCD_AMS369FG06 is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_BACKLIGHT_PWM=y +CONFIG_BACKLIGHT_TEGRA_PWM=y +# CONFIG_BACKLIGHT_ADP8860 is not set +# CONFIG_BACKLIGHT_ADP8870 is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_LOGO is not set +CONFIG_SOUND=y +# CONFIG_SOUND_OSS_CORE is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_JACK=y +# CONFIG_SND_SEQUENCER is not set +# CONFIG_SND_MIXER_OSS is not set +# CONFIG_SND_PCM_OSS is not set +# CONFIG_SND_HRTIMER is not set +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y +# CONFIG_SND_RAWMIDI_SEQ is not set +# CONFIG_SND_OPL3_LIB_SEQ is not set +# CONFIG_SND_OPL4_LIB_SEQ is not set +# CONFIG_SND_SBAWE_SEQ is not set +# CONFIG_SND_EMU10K1_SEQ is not set +CONFIG_SND_DRIVERS=y +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_ALOOP is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set +CONFIG_SND_PCI=y +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_CTXFI is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_PREALLOC_SIZE=64 +# CONFIG_SND_HDA_HWDEP is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set +# CONFIG_SND_HDA_INPUT_JACK is not set +# CONFIG_SND_HDA_PATCH_LOADER is not set +CONFIG_SND_HDA_PLATFORM_DRIVER=y +CONFIG_SND_HDA_PLATFORM_NVIDIA_TEGRA=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_ENABLE_REALTEK_QUIRKS=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_CIRRUS=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CA0110=y +CONFIG_SND_HDA_CODEC_CA0132=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10 +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_LOLA is not set +# CONFIG_SND_LX6464ES is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set +CONFIG_SND_ARM=y +CONFIG_SND_SPI=y +CONFIG_SND_USB=y +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_UA101 is not set +# CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_6FIRE is not set +CONFIG_SND_SOC=y +# CONFIG_SND_SOC_CACHE_LZO is not set +CONFIG_SND_SOC_TEGRA=y +CONFIG_SND_SOC_TEGRA30_AHUB=y +CONFIG_SND_SOC_TEGRA30_DAM=y +CONFIG_SND_SOC_TEGRA30_I2S=y +CONFIG_SND_SOC_TEGRA30_SPDIF=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_WM8903=y +# CONFIG_SND_SOC_TEGRA_WM8903 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_MAX98088=y +# CONFIG_SND_SOC_TEGRA_MAX98088 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_TLV320AIC326X=y +# CONFIG_SND_SOC_TEGRA_TLV320AIC326X is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5639=y +# CONFIG_SND_SOC_TEGRA_RT5639 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5640=y +CONFIG_SND_SOC_TEGRA_RT5640=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_MAX98095=y +# CONFIG_SND_SOC_TEGRA_MAX98095 is not set +CONFIG_HEADSET_FUNCTION=y +CONFIG_SND_SOC_I2C_AND_SPI=y +# CONFIG_SND_SOC_ALL_CODECS is not set +CONFIG_SND_SOC_RT5640=y +CONFIG_SND_SOC_RT5642=y +CONFIG_SND_SOC_SPDIF=y +# CONFIG_SND_SOC_TLV320AIC326X is not set +# CONFIG_SOUND_PRIME is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HIDRAW is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_HID_PID is not set +# CONFIG_USB_HIDDEV is not set + +# +# Special HID drivers +# +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +# CONFIG_HID_PRODIKEYS is not set +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HOLTEK_FF=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIWII_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +# CONFIG_HID_PICOLCD is not set +# CONFIG_HID_QUANTA is not set +# CONFIG_HID_ROCCAT is not set +# CONFIG_HID_SAMSUNG is not set +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_HID_WACOM=y +# CONFIG_HID_WACOM_POWER_SUPPLY is not set +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_ZEROPLUS_FF=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y +# CONFIG_USB_DYNAMIC_MINORS is not set +CONFIG_USB_SUSPEND=y +CONFIG_USB_OTG=y +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set +# CONFIG_USB_MON is not set +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +# CONFIG_USB_XHCI_HCD is not set +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_TT_NEWSCHED=y +CONFIG_USB_EHCI_TEGRA=y +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set +# CONFIG_USB_ISP1362_HCD is not set +# CONFIG_USB_OHCI_HCD is not set +# CONFIG_USB_UHCI_HCD is not set +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set +# CONFIG_USB_EHCI_ONOFF_FEATURE is not set +# CONFIG_USB_MUSB_HDRC is not set + +# +# USB Device Class drivers +# +CONFIG_USB_ACM=y +# CONFIG_USB_PRINTER is not set +CONFIG_USB_WDM=y +# CONFIG_USB_TMC is not set + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +# + +# +# also be needed; see USB_STORAGE Help for more info +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_REALTEK is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +# CONFIG_USB_STORAGE_ENE_UB6250 is not set +# CONFIG_USB_UAS is not set +CONFIG_USB_LIBUSUAL=y + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB port drivers +# +CONFIG_USB_SERIAL=y +# CONFIG_USB_SERIAL_CONSOLE is not set +# CONFIG_USB_EZUSB is not set +# CONFIG_USB_SERIAL_GENERIC is not set +# CONFIG_USB_SERIAL_AIRCABLE is not set +# CONFIG_USB_SERIAL_ARK3116 is not set +# CONFIG_USB_SERIAL_BELKIN is not set +# CONFIG_USB_SERIAL_CH341 is not set +# CONFIG_USB_SERIAL_WHITEHEAT is not set +# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set +# CONFIG_USB_SERIAL_CP210X is not set +# CONFIG_USB_SERIAL_CYPRESS_M8 is not set +# CONFIG_USB_SERIAL_EMPEG is not set +# CONFIG_USB_SERIAL_FTDI_SIO is not set +# CONFIG_USB_SERIAL_FUNSOFT is not set +# CONFIG_USB_SERIAL_VISOR is not set +# CONFIG_USB_SERIAL_IPAQ is not set +# CONFIG_USB_SERIAL_IR is not set +# CONFIG_USB_SERIAL_EDGEPORT is not set +# CONFIG_USB_SERIAL_EDGEPORT_TI is not set +# CONFIG_USB_SERIAL_GARMIN is not set +# CONFIG_USB_SERIAL_IPW is not set +# CONFIG_USB_SERIAL_IUU is not set +# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set +# CONFIG_USB_SERIAL_KEYSPAN is not set +# CONFIG_USB_SERIAL_KLSI is not set +# CONFIG_USB_SERIAL_KOBIL_SCT is not set +# CONFIG_USB_SERIAL_MCT_U232 is not set +# CONFIG_USB_SERIAL_MOS7720 is not set +# CONFIG_USB_SERIAL_MOS7840 is not set +# CONFIG_USB_SERIAL_MOTOROLA is not set +# CONFIG_USB_SERIAL_NAVMAN is not set +CONFIG_USB_SERIAL_PL2303=y +# CONFIG_USB_SERIAL_OTI6858 is not set +# CONFIG_USB_SERIAL_QCAUX is not set +# CONFIG_USB_SERIAL_QUALCOMM is not set +# CONFIG_USB_SERIAL_SPCP8X5 is not set +# CONFIG_USB_SERIAL_HP4X is not set +# CONFIG_USB_SERIAL_SAFE is not set +# CONFIG_USB_SERIAL_SIEMENS_MPI is not set +# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set +# CONFIG_USB_SERIAL_SYMBOL is not set +# CONFIG_USB_SERIAL_TI is not set +# CONFIG_USB_SERIAL_CYBERJACK is not set +# CONFIG_USB_SERIAL_XIRCOM is not set +CONFIG_USB_SERIAL_WWAN=y +CONFIG_USB_SERIAL_OPTION=y +# CONFIG_USB_SERIAL_OMNINET is not set +# CONFIG_USB_SERIAL_OPTICON is not set +# CONFIG_USB_SERIAL_VIVOPAY_SERIAL is not set +# CONFIG_USB_SERIAL_ZIO is not set +# CONFIG_USB_SERIAL_SSU100 is not set +# CONFIG_USB_SERIAL_DEBUG is not set +# CONFIG_USB_SERIAL_BASEBAND is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_YUREX is not set +CONFIG_USB_GADGET=y +# CONFIG_USB_GADGET_DEBUG is not set +# CONFIG_USB_GADGET_DEBUG_FILES is not set +# CONFIG_USB_GADGET_DEBUG_FS is not set +CONFIG_USB_GADGET_VBUS_DRAW=500 +CONFIG_USB_FSL_USB2=y +# CONFIG_USB_FUSB300 is not set +# CONFIG_USB_R8A66597 is not set +# CONFIG_USB_M66592 is not set +# CONFIG_USB_AMD5536UDC is not set +# CONFIG_USB_CI13XXX_PCI is not set +# CONFIG_USB_NET2272 is not set +# CONFIG_USB_NET2280 is not set +# CONFIG_USB_GOKU is not set +# CONFIG_USB_LANGWELL is not set +# CONFIG_USB_EG20T is not set +# CONFIG_USB_DUMMY_HCD is not set +CONFIG_USB_GADGET_DUALSPEED=y +# CONFIG_USB_ZERO is not set +# CONFIG_USB_AUDIO is not set +# CONFIG_USB_ETH is not set +# CONFIG_USB_G_NCM is not set +# CONFIG_USB_GADGETFS is not set +# CONFIG_USB_FUNCTIONFS is not set +# CONFIG_USB_FILE_STORAGE is not set +# CONFIG_USB_MASS_STORAGE is not set +# CONFIG_USB_G_SERIAL is not set +# CONFIG_USB_MIDI_GADGET is not set +# CONFIG_USB_G_PRINTER is not set +CONFIG_USB_G_ANDROID=y +# CONFIG_USB_CDC_COMPOSITE is not set +# CONFIG_USB_G_MULTI is not set +# CONFIG_USB_G_HID is not set +# CONFIG_USB_G_DBGP is not set +# CONFIG_USB_G_WEBCAM is not set + +# +# OTG and related infrastructure +# +CONFIG_USB_OTG_UTILS=y +# CONFIG_USB_OTG_WAKELOCK is not set +# CONFIG_USB_GPIO_VBUS is not set +CONFIG_USB_ULPI=y +CONFIG_USB_ULPI_VIEWPORT=y +# CONFIG_NOP_USB_XCEIV is not set +CONFIG_USB_TEGRA_OTG=y +# CONFIG_UWB is not set +CONFIG_MMC=y +# CONFIG_MMC_DEBUG is not set +CONFIG_MMC_UNSAFE_RESUME=y +# CONFIG_MMC_CLKGATE is not set +CONFIG_MMC_EMBEDDED_SDIO=y +# CONFIG_MMC_PARANOID_SD_INIT is not set + +# +# MMC/SD/SDIO Card Drivers +# +CONFIG_MMC_BLOCK=y +CONFIG_MMC_BLOCK_MINORS=16 +CONFIG_MMC_BLOCK_BOUNCE=y +CONFIG_MMC_BLOCK_DEFERRED_RESUME=y +# CONFIG_SDIO_UART is not set +CONFIG_MMC_TEST=y + +# +# MMC/SD/SDIO Host Controller Drivers +# +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_IO_ACCESSORS=y +# CONFIG_MMC_SDHCI_PCI is not set +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_TEGRA=y +# CONFIG_MMC_SDHCI_PXAV3 is not set +# CONFIG_MMC_SDHCI_PXAV2 is not set +# CONFIG_MMC_TIFM_SD is not set +# CONFIG_MMC_CB710 is not set +# CONFIG_MMC_VIA_SDMMC is not set +# CONFIG_MMC_DW is not set +# CONFIG_MMC_VUB300 is not set +# CONFIG_MMC_USHC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y + +# +# LED drivers +# +# CONFIG_LEDS_LM3530 is not set +# CONFIG_LEDS_PCA9532 is not set +CONFIG_LEDS_GPIO=y +# CONFIG_LEDS_LP3944 is not set +# CONFIG_LEDS_LP5521 is not set +# CONFIG_LEDS_LP5523 is not set +# CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_DAC124S085 is not set +# CONFIG_LEDS_PWM is not set +# CONFIG_LEDS_REGULATOR is not set +# CONFIG_LEDS_BD2802 is not set +# CONFIG_LEDS_LT3593 is not set +# CONFIG_LEDS_TRIGGERS is not set + +# +# LED Triggers +# +CONFIG_SWITCH=y +# CONFIG_SWITCH_GPIO is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +CONFIG_RTC_INTF_ALARM=y +CONFIG_RTC_INTF_ALARM_DEV=y +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_DS3232 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +CONFIG_RTC_DRV_MAX77663=y +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_ISL12022 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_BQ32K is not set +CONFIG_RTC_DRV_TPS6586X=y +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set +# CONFIG_RTC_DRV_EM3027 is not set +# CONFIG_RTC_DRV_RV3029C2 is not set + +# +# SPI RTC drivers +# +# CONFIG_RTC_DRV_M41T93 is not set +# CONFIG_RTC_DRV_M41T94 is not set +# CONFIG_RTC_DRV_DS1305 is not set +# CONFIG_RTC_DRV_DS1390 is not set +# CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_R9701 is not set +# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_DS3234 is not set +# CONFIG_RTC_DRV_PCF2123 is not set + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_CMOS is not set +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_MSM6242 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_RP5C01 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +# CONFIG_RTC_DRV_TEGRA is not set +CONFIG_RTC_DRV_TPS6591x=y +CONFIG_RTC_DRV_TPS80031=y +CONFIG_RTC_DRV_RC5T583=y +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set + +# +# Virtio drivers +# +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set +CONFIG_STAGING=y +# CONFIG_ET131X is not set +# CONFIG_USBIP_CORE is not set +# CONFIG_PRISM2_USB is not set +# CONFIG_ECHO is not set +# CONFIG_BRCMUTIL is not set +# CONFIG_ASUS_OLED is not set +# CONFIG_R8187SE is not set +# CONFIG_RTL8192U is not set +# CONFIG_RTL8192E is not set +# CONFIG_R8712U is not set +# CONFIG_RTS_PSTOR is not set +# CONFIG_TRANZPORT is not set + +# +# Android +# +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_RAM_CONSOLE=y +CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE=128 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE=16 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE=8 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL=0x11d +# CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT is not set +CONFIG_ANDROID_TIMED_OUTPUT=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +# CONFIG_POHMELFS is not set +# CONFIG_LINE6_USB is not set +# CONFIG_USB_SERIAL_QUATECH2 is not set +# CONFIG_USB_SERIAL_QUATECH_USB2 is not set +# CONFIG_VT6655 is not set +# CONFIG_VT6656 is not set +# CONFIG_VME_BUS is not set +# CONFIG_DX_SEP is not set +CONFIG_IIO=y +# CONFIG_IIO_ST_HWMON is not set +CONFIG_IIO_BUFFER=y +# CONFIG_IIO_SW_RING is not set +CONFIG_IIO_KFIFO_BUF=y +CONFIG_IIO_TRIGGER=y +CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 + +# +# Accelerometers +# +# CONFIG_ADIS16201 is not set +# CONFIG_ADIS16203 is not set +# CONFIG_ADIS16204 is not set +# CONFIG_ADIS16209 is not set +# CONFIG_ADIS16220 is not set +# CONFIG_ADIS16240 is not set +# CONFIG_KXSD9 is not set +# CONFIG_LIS3L02DQ is not set + +# +# Analog to digital convertors +# +# CONFIG_AD7150 is not set +# CONFIG_AD7152 is not set +# CONFIG_AD7291 is not set +# CONFIG_AD7298 is not set +# CONFIG_AD7314 is not set +# CONFIG_AD7606 is not set +# CONFIG_AD799X is not set +# CONFIG_AD7476 is not set +# CONFIG_AD7887 is not set +# CONFIG_AD7780 is not set +# CONFIG_AD7793 is not set +# CONFIG_AD7745 is not set +# CONFIG_AD7816 is not set +# CONFIG_ADT75 is not set +# CONFIG_ADT7310 is not set +# CONFIG_ADT7410 is not set +# CONFIG_MAX1363 is not set + +# +# Analog digital bi-direction convertors +# +# CONFIG_ADT7316 is not set + +# +# Digital to analog convertors +# +# CONFIG_AD5624R_SPI is not set +# CONFIG_AD5446 is not set +# CONFIG_AD5504 is not set +# CONFIG_AD5791 is not set +# CONFIG_AD5686 is not set +# CONFIG_MAX517 is not set + +# +# Direct Digital Synthesis +# +# CONFIG_AD5930 is not set +# CONFIG_AD9832 is not set +# CONFIG_AD9834 is not set +# CONFIG_AD9850 is not set +# CONFIG_AD9852 is not set +# CONFIG_AD9910 is not set +# CONFIG_AD9951 is not set + +# +# Digital gyroscope sensors +# +# CONFIG_ADIS16060 is not set +# CONFIG_ADIS16080 is not set +# CONFIG_ADIS16130 is not set +# CONFIG_ADIS16260 is not set +# CONFIG_ADXRS450 is not set + +# +# Inertial measurement units +# +# CONFIG_ADIS16400 is not set +CONFIG_INV_MPU_IIO=y + +# +# Light sensors +# +# CONFIG_SENSORS_ISL29018 is not set +CONFIG_SENSORS_ISL29028=y +# CONFIG_SENSORS_TSL2563 is not set +# CONFIG_TSL2583 is not set +CONFIG_SENSORS_LTR558=y + +# +# Magnetometer sensors +# +# CONFIG_SENSORS_HMC5843 is not set +CONFIG_AMI306=y + +# +# Active energy metering IC +# +# CONFIG_ADE7753 is not set +# CONFIG_ADE7754 is not set +# CONFIG_ADE7758 is not set +# CONFIG_ADE7759 is not set +# CONFIG_ADE7854 is not set + +# +# Resolver to digital converters +# +# CONFIG_AD2S90 is not set +# CONFIG_AD2S120X is not set +# CONFIG_AD2S1210 is not set + +# +# Triggers - standalone +# +# CONFIG_IIO_PERIODIC_RTC_TRIGGER is not set +# CONFIG_IIO_GPIO_TRIGGER is not set +# CONFIG_IIO_SYSFS_TRIGGER is not set +# CONFIG_IIO_SIMPLE_DUMMY is not set +CONFIG_XVMALLOC=y +CONFIG_ZRAM=y +# CONFIG_ZRAM_DEBUG is not set +# CONFIG_FB_SM7XX is not set +# CONFIG_VIDEO_DT3155 is not set +# CONFIG_CRYSTALHD is not set +# CONFIG_FB_XGI is not set +# CONFIG_EASYCAP is not set +# CONFIG_SOLO6X10 is not set +# CONFIG_ATH6K_LEGACY is not set +# CONFIG_USB_ENESTORAGE is not set +# CONFIG_BCM_WIMAX is not set +# CONFIG_FT1000 is not set + +# +# Speakup console speech +# +# CONFIG_TOUCHSCREEN_CLEARPAD_TM1217 is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4 is not set +# CONFIG_ALTERA_STAPL is not set +# CONFIG_MFD_NVEC is not set +CONFIG_CLKDEV_LOOKUP=y +CONFIG_CLKSRC_MMIO=y +CONFIG_IOMMU_SUPPORT=y +# CONFIG_TEGRA_IOMMU_SMMU is not set +# CONFIG_VIRT_DRIVERS is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_XATTR=y +CONFIG_EXT4_FS_POSIX_ACL=y +# CONFIG_EXT4_FS_SECURITY is not set +# CONFIG_EXT4_DEBUG is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=y +# CONFIG_JBD2_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y +# CONFIG_DNOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_FANOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_QUOTACTL is not set +# CONFIG_AUTOFS4_FS is not set +CONFIG_FUSE_FS=y +# CONFIG_CUSE is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +# CONFIG_MSDOS_FS is not set +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=y +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_REPORT_PRESENT_CPUS=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_TMPFS_XATTR is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_LOGFS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_PSTORE is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +CONFIG_NFS_V4=y +# CONFIG_NFS_V4_1 is not set +CONFIG_ROOT_NFS=y +# CONFIG_NFS_USE_LEGACY_DNS is not set +CONFIG_NFS_USE_KERNEL_DNS=y +# CONFIG_NFS_USE_NEW_IDMAPPER is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +# CONFIG_CEPH_FS is not set +CONFIG_CIFS=y +CONFIG_CIFS_STATS=y +# CONFIG_CIFS_STATS2 is not set +CONFIG_CIFS_WEAK_PW_HASH=y +# CONFIG_CIFS_UPCALL is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_DFS_UPCALL is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=y + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4 +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_SECTION_MISMATCH is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_LOCKUP_DETECTOR=y +# CONFIG_HARDLOCKUP_DETECTOR is not set +# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +# CONFIG_DETECT_HUNG_TASK is not set +CONFIG_SCHED_DEBUG=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_KMEMLEAK is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_SPARSE_RCU_POINTER is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_ATOMIC_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_INFO_REDUCED is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_TEST_LIST_SORT is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RCU_CPU_STALL_VERBOSE=y +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_LKDTM is not set +# CONFIG_CPU_NOTIFIER_ERROR_INJECT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +# CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +# CONFIG_EVENT_POWER_TRACING_DEPRECATED is not set +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_RING_BUFFER_ALLOW_SWAP=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_RING_BUFFER_BENCHMARK is not set +# CONFIG_TRACELEVEL is not set +CONFIG_DYNAMIC_DEBUG=y +# CONFIG_DMA_API_DEBUG is not set +# CONFIG_ATOMIC64_SELFTEST is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_TEST_KSTRTOX is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_ARM_UNWIND=y +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_LL is not set +# CONFIG_OC_ETM is not set + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_SECURITY_DMESG_RESTRICT is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +CONFIG_TRUSTED_FOUNDATIONS=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP2=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_PCRYPT is not set +CONFIG_CRYPTO_WORKQUEUE=y +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=y +# CONFIG_CRYPTO_GHASH is not set +CONFIG_CRYPTO_MD4=y +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=y +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +CONFIG_CRYPTO_TWOFISH=y +CONFIG_CRYPTO_TWOFISH_COMMON=y + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=y +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_USER_API_HASH is not set +# CONFIG_CRYPTO_USER_API_SKCIPHER is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_CRYPTO_DEV_TEGRA_AES is not set +CONFIG_CRYPTO_DEV_TEGRA_SE=y +CONFIG_BINARY_PRINTF=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=y +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=y +# CONFIG_CRC8 is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +# CONFIG_XZ_DEC is not set +# CONFIG_XZ_DEC_BCJ is not set +CONFIG_DECOMPRESS_GZIP=y +CONFIG_REED_SOLOMON=y +CONFIG_REED_SOLOMON_ENC8=y +CONFIG_REED_SOLOMON_DEC8=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=y +CONFIG_TEXTSEARCH_BM=y +CONFIG_TEXTSEARCH_FSM=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_CPU_RMAP=y +CONFIG_NLATTR=y +# CONFIG_AVERAGE is not set +# CONFIG_CORDIC is not set From e60aed0762ce45db073f62105004ffec68725b49 Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 15 Jul 2012 12:29:09 -0400 Subject: [PATCH 003/678] Add Simple IO scheduler and set to be the default i/o scheduler --- block/Kconfig.iosched | 14 ++ block/Makefile | 1 + block/sio-iosched.c | 399 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 414 insertions(+) create mode 100644 block/sio-iosched.c diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 3199b76f795..624af8fcfb4 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -43,6 +43,16 @@ config CFQ_GROUP_IOSCHED ---help--- Enable group IO scheduling in CFQ. +config IOSCHED_SIO + tristate "Simple I/O scheduler" + default y + ---help--- + The Simple I/O scheduler is an extremely simple scheduler, + based on noop and deadline, that relies on deadlines to + ensure fairness. The algorithm does not do any sorting but + basic merging, trying to keep a minimum overhead. It is aimed + mainly for aleatory access devices (eg: flash devices). + choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -58,6 +68,9 @@ choice config DEFAULT_NOOP bool "No-op" + + config DEFAULT_SIO + bool "SIO" if IOSCHED_SIO=y endchoice @@ -66,6 +79,7 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "cfq" if DEFAULT_CFQ default "noop" if DEFAULT_NOOP + default "sio" if DEFAULT_SIO endmenu diff --git a/block/Makefile b/block/Makefile index 514c6e4f427..7d657e71c48 100644 --- a/block/Makefile +++ b/block/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o +obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o diff --git a/block/sio-iosched.c b/block/sio-iosched.c new file mode 100644 index 00000000000..c52a67c554c --- /dev/null +++ b/block/sio-iosched.c @@ -0,0 +1,399 @@ +/* + * Simple IO scheduler + * Based on Noop, Deadline and V(R) IO schedulers. + * + * Copyright (C) 2012 Miguel Boton + * + * + * This algorithm does not do any kind of sorting, as it is aimed for + * aleatory access devices, but it does some basic merging. We try to + * keep minimum overhead to achieve low latency. + * + * Asynchronous and synchronous requests are not treated separately, but + * we relay on deadlines to ensure fairness. + * + */ +#include +#include +#include +#include +#include +#include + +enum { ASYNC, SYNC }; + +/* Tunables */ +static const int sync_read_expire = HZ / 2; /* max time before a sync read is submitted. */ +static const int sync_write_expire = 2 * HZ; /* max time before a sync write is submitted. */ + +static const int async_read_expire = 4 * HZ; /* ditto for async, these limits are SOFT! */ +static const int async_write_expire = 16 * HZ; /* ditto for async, these limits are SOFT! */ + +static const int writes_starved = 2; /* max times reads can starve a write */ +static const int fifo_batch = 8; /* # of sequential requests treated as one + by the above parameters. For throughput. */ + +/* Elevator data */ +struct sio_data { + /* Request queues */ + struct list_head fifo_list[2][2]; + + /* Attributes */ + unsigned int batched; + unsigned int starved; + + /* Settings */ + int fifo_expire[2][2]; + int fifo_batch; + int writes_starved; +}; + +static void +sio_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + /* + * If next expires before rq, assign its expire time to rq + * and move into next position (next will be deleted) in fifo. + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(rq_fifo_time(next), rq_fifo_time(rq))) { + list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } + } + + /* Delete next request */ + rq_fifo_clear(next); +} + +static void +sio_add_request(struct request_queue *q, struct request *rq) +{ + struct sio_data *sd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); + const int data_dir = rq_data_dir(rq); + + /* + * Add request to the proper fifo list and set its + * expire time. + */ + rq_set_fifo_time(rq, jiffies + sd->fifo_expire[sync][data_dir]); + list_add_tail(&rq->queuelist, &sd->fifo_list[sync][data_dir]); +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) +static int +sio_queue_empty(struct request_queue *q) +{ + struct sio_data *sd = q->elevator->elevator_data; + + /* Check if fifo lists are empty */ + return list_empty(&sd->fifo_list[SYNC][READ]) && list_empty(&sd->fifo_list[SYNC][WRITE]) && + list_empty(&sd->fifo_list[ASYNC][READ]) && list_empty(&sd->fifo_list[ASYNC][WRITE]); +} +#endif + +static struct request * +sio_expired_request(struct sio_data *sd, int sync, int data_dir) +{ + struct list_head *list = &sd->fifo_list[sync][data_dir]; + struct request *rq; + + if (list_empty(list)) + return NULL; + + /* Retrieve request */ + rq = rq_entry_fifo(list->next); + + /* Request has expired */ + if (time_after(jiffies, rq_fifo_time(rq))) + return rq; + + return NULL; +} + +static struct request * +sio_choose_expired_request(struct sio_data *sd) +{ + struct request *rq; + + /* + * Check expired requests. + * Asynchronous requests have priority over synchronous. + * Write requests have priority over read. + */ + rq = sio_expired_request(sd, ASYNC, WRITE); + if (rq) + return rq; + rq = sio_expired_request(sd, ASYNC, READ); + if (rq) + return rq; + + rq = sio_expired_request(sd, SYNC, WRITE); + if (rq) + return rq; + rq = sio_expired_request(sd, SYNC, READ); + if (rq) + return rq; + + return NULL; +} + +static struct request * +sio_choose_request(struct sio_data *sd, int data_dir) +{ + struct list_head *sync = sd->fifo_list[SYNC]; + struct list_head *async = sd->fifo_list[ASYNC]; + + /* + * Retrieve request from available fifo list. + * Synchronous requests have priority over asynchronous. + * Read requests have priority over write. + */ + if (!list_empty(&sync[data_dir])) + return rq_entry_fifo(sync[data_dir].next); + if (!list_empty(&async[data_dir])) + return rq_entry_fifo(async[data_dir].next); + + if (!list_empty(&sync[!data_dir])) + return rq_entry_fifo(sync[!data_dir].next); + if (!list_empty(&async[!data_dir])) + return rq_entry_fifo(async[!data_dir].next); + + return NULL; +} + +static inline void +sio_dispatch_request(struct sio_data *sd, struct request *rq) +{ + /* + * Remove the request from the fifo list + * and dispatch it. + */ + rq_fifo_clear(rq); + elv_dispatch_add_tail(rq->q, rq); + + sd->batched++; + + if (rq_data_dir(rq)) + sd->starved = 0; + else + sd->starved++; +} + +static int +sio_dispatch_requests(struct request_queue *q, int force) +{ + struct sio_data *sd = q->elevator->elevator_data; + struct request *rq = NULL; + int data_dir = READ; + + /* + * Retrieve any expired request after a batch of + * sequential requests. + */ + if (sd->batched > sd->fifo_batch) { + sd->batched = 0; + rq = sio_choose_expired_request(sd); + } + + /* Retrieve request */ + if (!rq) { + if (sd->starved > sd->writes_starved) + data_dir = WRITE; + + rq = sio_choose_request(sd, data_dir); + if (!rq) + return 0; + } + + /* Dispatch request */ + sio_dispatch_request(sd, rq); + + return 1; +} + +static struct request * +sio_former_request(struct request_queue *q, struct request *rq) +{ + struct sio_data *sd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); + const int data_dir = rq_data_dir(rq); + + if (rq->queuelist.prev == &sd->fifo_list[sync][data_dir]) + return NULL; + + /* Return former request */ + return list_entry(rq->queuelist.prev, struct request, queuelist); +} + +static struct request * +sio_latter_request(struct request_queue *q, struct request *rq) +{ + struct sio_data *sd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); + const int data_dir = rq_data_dir(rq); + + if (rq->queuelist.next == &sd->fifo_list[sync][data_dir]) + return NULL; + + /* Return latter request */ + return list_entry(rq->queuelist.next, struct request, queuelist); +} + +static void * +sio_init_queue(struct request_queue *q) +{ + struct sio_data *sd; + + /* Allocate structure */ + sd = kmalloc_node(sizeof(*sd), GFP_KERNEL, q->node); + if (!sd) + return NULL; + + /* Initialize fifo lists */ + INIT_LIST_HEAD(&sd->fifo_list[SYNC][READ]); + INIT_LIST_HEAD(&sd->fifo_list[SYNC][WRITE]); + INIT_LIST_HEAD(&sd->fifo_list[ASYNC][READ]); + INIT_LIST_HEAD(&sd->fifo_list[ASYNC][WRITE]); + + /* Initialize data */ + sd->batched = 0; + sd->fifo_expire[SYNC][READ] = sync_read_expire; + sd->fifo_expire[SYNC][WRITE] = sync_write_expire; + sd->fifo_expire[ASYNC][READ] = async_read_expire; + sd->fifo_expire[ASYNC][WRITE] = async_write_expire; + sd->fifo_batch = fifo_batch; + + return sd; +} + +static void +sio_exit_queue(struct elevator_queue *e) +{ + struct sio_data *sd = e->elevator_data; + + BUG_ON(!list_empty(&sd->fifo_list[SYNC][READ])); + BUG_ON(!list_empty(&sd->fifo_list[SYNC][WRITE])); + BUG_ON(!list_empty(&sd->fifo_list[ASYNC][READ])); + BUG_ON(!list_empty(&sd->fifo_list[ASYNC][WRITE])); + + /* Free structure */ + kfree(sd); +} + +/* + * sysfs code + */ + +static ssize_t +sio_var_show(int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t +sio_var_store(int *var, const char *page, size_t count) +{ + char *p = (char *) page; + + *var = simple_strtol(p, &p, 10); + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct sio_data *sd = e->elevator_data; \ + int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return sio_var_show(__data, (page)); \ +} +SHOW_FUNCTION(sio_sync_read_expire_show, sd->fifo_expire[SYNC][READ], 1); +SHOW_FUNCTION(sio_sync_write_expire_show, sd->fifo_expire[SYNC][WRITE], 1); +SHOW_FUNCTION(sio_async_read_expire_show, sd->fifo_expire[ASYNC][READ], 1); +SHOW_FUNCTION(sio_async_write_expire_show, sd->fifo_expire[ASYNC][WRITE], 1); +SHOW_FUNCTION(sio_fifo_batch_show, sd->fifo_batch, 0); +SHOW_FUNCTION(sio_writes_starved_show, sd->writes_starved, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct sio_data *sd = e->elevator_data; \ + int __data; \ + int ret = sio_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(sio_sync_read_expire_store, &sd->fifo_expire[SYNC][READ], 0, INT_MAX, 1); +STORE_FUNCTION(sio_sync_write_expire_store, &sd->fifo_expire[SYNC][WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(sio_async_read_expire_store, &sd->fifo_expire[ASYNC][READ], 0, INT_MAX, 1); +STORE_FUNCTION(sio_async_write_expire_store, &sd->fifo_expire[ASYNC][WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(sio_fifo_batch_store, &sd->fifo_batch, 0, INT_MAX, 0); +STORE_FUNCTION(sio_writes_starved_store, &sd->writes_starved, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +#define DD_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, sio_##name##_show, \ + sio_##name##_store) + +static struct elv_fs_entry sio_attrs[] = { + DD_ATTR(sync_read_expire), + DD_ATTR(sync_write_expire), + DD_ATTR(async_read_expire), + DD_ATTR(async_write_expire), + DD_ATTR(fifo_batch), + DD_ATTR(writes_starved), + __ATTR_NULL +}; + +static struct elevator_type iosched_sio = { + .ops = { + .elevator_merge_req_fn = sio_merged_requests, + .elevator_dispatch_fn = sio_dispatch_requests, + .elevator_add_req_fn = sio_add_request, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) + .elevator_queue_empty_fn = sio_queue_empty, +#endif + .elevator_former_req_fn = sio_former_request, + .elevator_latter_req_fn = sio_latter_request, + .elevator_init_fn = sio_init_queue, + .elevator_exit_fn = sio_exit_queue, + }, + + .elevator_attrs = sio_attrs, + .elevator_name = "sio", + .elevator_owner = THIS_MODULE, +}; + +static int __init sio_init(void) +{ + /* Register elevator */ + elv_register(&iosched_sio); + + return 0; +} + +static void __exit sio_exit(void) +{ + /* Unregister elevator */ + elv_unregister(&iosched_sio); +} + +module_init(sio_init); +module_exit(sio_exit); + +MODULE_AUTHOR("Miguel Boton"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simple IO scheduler"); +MODULE_VERSION("0.2"); From 4b4fabe645dfe99eef274931b6e611b0ef52c25f Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Sun, 15 Jul 2012 14:10:41 -0400 Subject: [PATCH 004/678] ARM: vfp: only clear vfp state for current cpu in vfp_pm_suspend vfp_pm_suspend runs on each cpu, only clear the hardware state pointer for the current cpu. Prevents a possible crash if one cpu clears the hw state pointer when another cpu has already checked if it is valid. --- arch/arm/vfp/vfpmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index e381dc68505..329eaf456c7 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -461,7 +461,7 @@ static int vfp_pm_suspend(void) } /* clear any information we had about last context state */ - memset(vfp_current_hw_state, 0, sizeof(vfp_current_hw_state)); + last_VFP_context[ti->cpu] = NULL; return 0; } From ea9911129b804c0ccbd63fbfc7b0839b7ed6c178 Mon Sep 17 00:00:00 2001 From: Hyungwoo Yang Date: Sun, 15 Jul 2012 14:15:23 -0400 Subject: [PATCH 005/678] System crashes if there is process migration during vfp_init() call. During vfp_init(), if a process which called vfp_enable() is migrated just after the call, then the process executing the rest of code will access a VFP unit which is not ENABLED and also smp_call_function() will not work as it is expected. This patch prevents accessing VFP unit disabled by preventing migration and also replaces smp_call_function() with on_each_cpu() to make sure that no VFP remains disabled. --- arch/arm/vfp/vfpmodule.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 329eaf456c7..7ffae27c1e6 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -574,6 +574,9 @@ static int __init vfp_init(void) unsigned int vfpsid; unsigned int cpu_arch = cpu_architecture(); +#ifdef CONFIG_SMP + preempt_disable(); +#endif if (cpu_arch >= CPU_ARCH_ARMv6) vfp_enable(NULL); @@ -587,6 +590,9 @@ static int __init vfp_init(void) vfpsid = fmrx(FPSID); barrier(); vfp_vector = vfp_null_entry; +#ifdef CONFIG_SMP + preempt_enable(); +#endif printk(KERN_INFO "VFP support v0.3: "); if (VFP_arch) @@ -596,7 +602,7 @@ static int __init vfp_init(void) } else { hotcpu_notifier(vfp_hotplug, 0); - smp_call_function(vfp_enable, NULL, 1); + on_each_cpu(vfp_enable, NULL, 1); VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; /* Extract the architecture version */ printk("implementor %02x architecture %d part %02x variant %x rev %x\n", From 74ea80c9529c8bb9cff483dfec97f8b537f3f48e Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 15 Jul 2012 15:06:55 -0400 Subject: [PATCH 006/678] Revert "ARM: vfp: only clear vfp state for current cpu in vfp_pm_suspend vfp_pm_suspend runs on each cpu, only clear the hardware state pointer for the current cpu. Prevents a possible crash if one cpu clears the hw state pointer when another cpu has already checked if it is valid." This reverts commit 31d3897739f39b02af134bc15caa7062cb779a82. --- arch/arm/vfp/vfpmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 7ffae27c1e6..07d3a2c6abb 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -461,7 +461,7 @@ static int vfp_pm_suspend(void) } /* clear any information we had about last context state */ - last_VFP_context[ti->cpu] = NULL; + memset(vfp_current_hw_state, 0, sizeof(vfp_current_hw_state)); return 0; } From 0d0ef1924f22b228b8cf61bf27cd3d5b19cf3ba4 Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 15 Jul 2012 15:33:21 -0400 Subject: [PATCH 007/678] Revert "System crashes if there is process migration during vfp_init() call. During vfp_init(), if a process which called vfp_enable() is migrated just after the call, then the process executing the rest of code will access a VFP unit which is not ENABLED and also smp_call_function() will not work as it is expected." This reverts commit 8eea45c16beda8771b1b2f61f03ec005653628a7. --- arch/arm/vfp/vfpmodule.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 07d3a2c6abb..e381dc68505 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -574,9 +574,6 @@ static int __init vfp_init(void) unsigned int vfpsid; unsigned int cpu_arch = cpu_architecture(); -#ifdef CONFIG_SMP - preempt_disable(); -#endif if (cpu_arch >= CPU_ARCH_ARMv6) vfp_enable(NULL); @@ -590,9 +587,6 @@ static int __init vfp_init(void) vfpsid = fmrx(FPSID); barrier(); vfp_vector = vfp_null_entry; -#ifdef CONFIG_SMP - preempt_enable(); -#endif printk(KERN_INFO "VFP support v0.3: "); if (VFP_arch) @@ -602,7 +596,7 @@ static int __init vfp_init(void) } else { hotcpu_notifier(vfp_hotplug, 0); - on_each_cpu(vfp_enable, NULL, 1); + smp_call_function(vfp_enable, NULL, 1); VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; /* Extract the architecture version */ printk("implementor %02x architecture %d part %02x variant %x rev %x\n", From 322ab586ab9f67013efd5509ec5b954497d87566 Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 15 Jul 2012 15:37:23 -0400 Subject: [PATCH 008/678] Revert "Revert "System crashes if there is process migration during vfp_init() call. During vfp_init(), if a process which called vfp_enable() is migrated just after the call, then the process executing the rest of code will access a VFP unit which is not ENABLED and also smp_call_function() will not work as it is expected."" This reverts commit 2b9361604564d239e5186be16948e7199b3a2113. --- arch/arm/vfp/vfpmodule.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index e381dc68505..07d3a2c6abb 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -574,6 +574,9 @@ static int __init vfp_init(void) unsigned int vfpsid; unsigned int cpu_arch = cpu_architecture(); +#ifdef CONFIG_SMP + preempt_disable(); +#endif if (cpu_arch >= CPU_ARCH_ARMv6) vfp_enable(NULL); @@ -587,6 +590,9 @@ static int __init vfp_init(void) vfpsid = fmrx(FPSID); barrier(); vfp_vector = vfp_null_entry; +#ifdef CONFIG_SMP + preempt_enable(); +#endif printk(KERN_INFO "VFP support v0.3: "); if (VFP_arch) @@ -596,7 +602,7 @@ static int __init vfp_init(void) } else { hotcpu_notifier(vfp_hotplug, 0); - smp_call_function(vfp_enable, NULL, 1); + on_each_cpu(vfp_enable, NULL, 1); VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; /* Extract the architecture version */ printk("implementor %02x architecture %d part %02x variant %x rev %x\n", From a839bdfffd0316188218062a1c6e816066d53528 Mon Sep 17 00:00:00 2001 From: motley Date: Mon, 16 Jul 2012 00:59:55 -0400 Subject: [PATCH 009/678] Stop some kernel logging spam, we can see this in debug mode if we want to. --- arch/arm/mach-tegra/tegra3_clocks.c | 22 +++++++++++++++++++++- kernel/stop_machine.c | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 3650b2268ec..b9e3e25c1b7 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -839,7 +839,7 @@ static int tegra3_cpu_clk_set_rate(struct clk *c, unsigned long rate) return -ENOSYS; else if ((!c->dvfs->dvfs_rail->reg) && (clk_get_rate_locked(c) < rate)) { - WARN(1, "Increasing CPU rate while regulator is not" + pr_debug("Increasing CPU rate while regulator is not" " ready may overclock CPU\n"); return -ENOSYS; } @@ -4614,6 +4614,24 @@ static struct cpufreq_frequency_table freq_table_1p5GHz[] = { {14, CPUFREQ_TABLE_END }, }; +static struct cpufreq_frequency_table freq_table_1p6GHz[] = { + { 0, 51000 }, + { 1, 102000 }, + { 2, 204000 }, + { 3, 370000 }, + { 4, 475000 }, + { 5, 620000 }, + { 6, 760000 }, + { 7, 910000 }, + { 8, 1150000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, CPUFREQ_TABLE_END }, +}; + static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 0, 51000 }, { 1, 102000 }, @@ -4638,6 +4656,7 @@ static struct tegra_cpufreq_table_data cpufreq_tables[] = { { freq_table_1p3GHz, 2, 10 }, { freq_table_1p4GHz, 2, 11 }, { freq_table_1p5GHz, 2, 12 }, + { freq_table_1p6GHz, 2, 12 }, { freq_table_1p7GHz, 2, 12 }, }; @@ -4706,6 +4725,7 @@ struct tegra_cpufreq_table_data *tegra_cpufreq_table_get(void) ret = clip_cpu_rate_limits( &cpufreq_tables[i], &policy, cpu_clk_g, cpu_clk_lp); + printk("tegra3_clocks: clip_cpu_rate_limits return code: %u\n", ret); if (!ret) return &cpufreq_tables[i]; } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 1ca7b0407c9..e45ffcc9190 100755 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -452,7 +452,7 @@ static int stop_machine_cpu_stop(void *data) is_active = cpu == cpumask_first(cpu_online_mask); else is_active = cpumask_test_cpu(cpu, smdata->active_cpus); - printk("stop_machine_cpu_stop smp=%u\n",cpu); + pr_debug("stop_machine_cpu_stop smp=%u\n",cpu); /* Simple state machine */ do { /* Chill out and ensure we re-read stopmachine_state. */ From 336e96ab1f9957a59445154c9806fc4c51c9d256 Mon Sep 17 00:00:00 2001 From: motley Date: Mon, 16 Jul 2012 01:01:06 -0400 Subject: [PATCH 010/678] Dynamic EDP - allows EDP to be enabled (safer) with an added simple temperature throttle (based on Asus Prime) --- arch/arm/mach-tegra/cpu-tegra.c | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 871988dc280..dc992e6dff5 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -58,6 +58,10 @@ static int suspend_index; static bool force_policy_max; +#define TEGRA3_OVERCLOCK +#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (60) +static bool edp_enable = 0; + static int force_policy_max_set(const char *arg, const struct kernel_param *kp) { int ret; @@ -220,6 +224,16 @@ int tegra_edp_update_thermal_zone(int temperature) int nlimits = cpu_edp_limits_size; int index; +#ifdef TEGRA3_OVERCLOCK + if(temperature >= TEGRA3_DYNAMIC_EDP_THRES_TEMP) { + edp_enable = 1; + pr_info("%s: Dynamic EDP enabled, temp: %u\n", __func__, temperature); + } else { + edp_enable = 0; + pr_info("%s: Dynamic EDP disabled, temp: %u\n", __func__, temperature); + } +#endif + if (!cpu_edp_limits) return -EINVAL; @@ -324,7 +338,16 @@ static int tegra_cpu_edp_notify( edp_update_limit(); cpu_speed = tegra_getspeed(0); + +#ifdef TEGRA3_OVERCLOCK + if(edp_enable) { + pr_info("%s: tegra_cpu_edp_notify(). Dynamic EDP is enabled.\n", __func__); + new_speed = edp_governor_speed(new_speed); + pr_info("%s: tegra_cpu_edp_notify(). Dynamic EDP is disabled.\n", __func__); + } +#else new_speed = edp_governor_speed(cpu_speed); +#endif if (new_speed < cpu_speed) { ret = tegra_cpu_set_speed_cap(NULL); if (ret) { @@ -584,7 +607,15 @@ int tegra_cpu_set_speed_cap(unsigned int *speed_cap) return -EBUSY; new_speed = tegra_throttle_governor_speed(new_speed); + +#ifdef TEGRA3_OVERCLOCK + if(edp_enable) { + new_speed = edp_governor_speed(new_speed); + } +#else new_speed = edp_governor_speed(new_speed); +#endif + new_speed = user_cap_speed(new_speed); if (speed_cap) *speed_cap = new_speed; @@ -604,7 +635,14 @@ int tegra_suspended_target(unsigned int target_freq) /* apply only "hard" caps */ new_speed = tegra_throttle_governor_speed(new_speed); +#ifdef TEGRA3_OVERCLOCK + if(edp_enable) { + pr_info("%s : Dynamic EDP is enabled\n", __func__); + new_speed = edp_governor_speed(new_speed); + } +#else new_speed = edp_governor_speed(new_speed); +#endif return tegra_update_cpu_speed(new_speed); } From 22aad3592609e11267af65cf7904203ad06a001f Mon Sep 17 00:00:00 2001 From: motley Date: Mon, 16 Jul 2012 01:06:57 -0400 Subject: [PATCH 011/678] OC to 1.6GHz -Added 1.6GHz frequency table -DVFS tweaks -Speedo ID 7/Process ID 2 voltage tweaks --- arch/arm/mach-tegra/clock.c | 5 ++++- arch/arm/mach-tegra/cpu-tegra3.c | 8 ++++---- arch/arm/mach-tegra/tegra3_dvfs.c | 12 ++++++++---- arch/arm/mach-tegra/tegra3_speedo.c | 5 +++-- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c index 162063a8529..e9ee87534bc 100644 --- a/arch/arm/mach-tegra/clock.c +++ b/arch/arm/mach-tegra/clock.c @@ -684,7 +684,10 @@ void __init tegra_init_max_rate(struct clk *c, unsigned long max_rate) pr_warning("Lowering %s maximum rate from %lu to %lu\n", c->name, c->max_rate, max_rate); - + if(!strncmp(c->name,"cpu_g",strlen("cpu_g"))){ + pr_warning("Keep max_rate of %s as %lu \n",c->name, c->max_rate); + return; + } c->max_rate = max_rate; list_for_each_entry(shared_bus_user, &c->shared_bus_list, u.shared_bus_user.node) { diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index cc01ed854a5..9ec18b2c88b 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -291,13 +291,13 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { if (up){ - printk("cpu_up(%u)+\n",cpu); + pr_debug("cpu_up(%u)+\n",cpu); cpu_up(cpu); - printk("cpu_up(%u)-\n",cpu); + pr_debug("cpu_up(%u)-\n",cpu); }else{ - printk("cpu_down(%u)+\n",cpu); + pr_debug("cpu_down(%u)+\n",cpu); cpu_down(cpu); - printk("cpu_down(%u)-\n",cpu); + pr_debug("cpu_down(%u)-\n",cpu); } } } diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 48c4384b1aa..1e3dce73564 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -33,7 +33,7 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1185, 1200, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50}; @@ -82,8 +82,9 @@ static int tegra3_get_core_floor_mv(int cpu_mv) if ((tegra_cpu_speedo_id() < 2) || (tegra_cpu_speedo_id() == 4) || (tegra_cpu_speedo_id() == 7) || - (tegra_cpu_speedo_id() == 8)) + (tegra_cpu_speedo_id() == 8)) { return 1200; + } if (cpu_mv < 1100) return 1200; if (cpu_mv <= 1250) @@ -173,10 +174,10 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), - CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1300), + CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500, 1540, 1600), CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1300), CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1300), - + CPU_DVFS("cpu_g", 8, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 8, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), CPU_DVFS("cpu_g", 8, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1300), @@ -511,6 +512,7 @@ static int __init get_cpu_nominal_mv_index( * result to the nominal cpu level for the chips with this speedo_id. */ mv = tegra3_dvfs_rail_vdd_core.nominal_millivolts; + printk("tegra3_speedo: tegra3_dvfs_rail_vdd_core.nominal_millivolts mV for cpu_speedo_id:%u is %umV\n",speedo_id,mv); for (i = 0; i < MAX_DVFS_FREQS; i++) { if ((cpu_millivolts[i] == 0) || tegra3_get_core_floor_mv(cpu_millivolts[i]) > mv) @@ -521,6 +523,8 @@ static int __init get_cpu_nominal_mv_index( BUG_ON(mv < tegra3_dvfs_rail_vdd_cpu.min_millivolts); mv = min(mv, tegra_cpu_speedo_mv()); + printk("tegra3_speedo: nominal mV for cpu_speedo_id:%u is %umV",speedo_id,mv); + /* * Find matching cpu dvfs entry, and use it to determine index to the * final nominal voltage, that satisfies the following requirements: diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index bd880bc7ca8..0f0d9f7bdd5 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -510,8 +510,9 @@ int tegra_package_id(void) * latter is resolved by the dvfs code) */ static const int cpu_speedo_nominal_millivolts[] = -/* speedo_id 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 */ - { 1125, 1150, 1150, 1150, 1237, 1237, 1237, 1150, 1150, 912, 850, 850, 1237, 1237}; + /* speedo_id + * 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 */ + { 1125, 1150, 1150, 1150, 1237, 1237, 1237, 1200, 1150, 912, 850, 850, 1237, 1237}; int tegra_cpu_speedo_mv(void) { From 76feeeae59db9e320694f05ebd1f256dfae56898 Mon Sep 17 00:00:00 2001 From: motley Date: Mon, 16 Jul 2012 08:34:44 -0400 Subject: [PATCH 012/678] tegra3_dvfs: changes to allow OC for Process ID's 0 and 1. Theoretically, these should be earlier release versions like IO and earlier. --- arch/arm/mach-tegra/tegra3_dvfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 1e3dce73564..2d2d0a8d1da 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -172,8 +172,8 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 6, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), CPU_DVFS("cpu_g", 6, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1240, 1280, 1360, 1390, 1470, 1500, 1520, 1520, 1590, 1700), - CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), - CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), + CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600), + CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300, 1320, 1360, 1360, 1500, 1540, 1600), CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500, 1540, 1600), CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1300), CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1300), From 2ad3ab05d9ee013db6fe2aa2235d2093c06d52f8 Mon Sep 17 00:00:00 2001 From: motley Date: Tue, 17 Jul 2012 08:37:05 -0400 Subject: [PATCH 013/678] tegra3_dvfs: GPU OC to 446 (will add CONFIG switch later if successful) -also fixed silly remaining brackets from prior debugging --- arch/arm/mach-tegra/tegra3_dvfs.c | 37 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 2d2d0a8d1da..7861e6a9109 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -82,9 +82,8 @@ static int tegra3_get_core_floor_mv(int cpu_mv) if ((tegra_cpu_speedo_id() < 2) || (tegra_cpu_speedo_id() == 4) || (tegra_cpu_speedo_id() == 7) || - (tegra_cpu_speedo_id() == 8)) { + (tegra_cpu_speedo_id() == 8)) return 1200; - } if (cpu_mv < 1100) return 1200; if (cpu_mv <= 1250) @@ -236,21 +235,21 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("vi", 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), CORE_DVFS("vi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("vde", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("mpe", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), @@ -273,8 +272,8 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), From b0339c80ba2ce4d09374ff6f3aa8aa2e3972d3f6 Mon Sep 17 00:00:00 2001 From: Aaron Carroll Date: Tue, 17 Jul 2012 20:59:44 -0400 Subject: [PATCH 014/678] Added V(R) I/O Scheduler --- block/Kconfig.iosched | 14 +- block/Makefile | 1 + block/vr-iosched.c | 452 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 465 insertions(+), 2 deletions(-) create mode 100644 block/vr-iosched.c diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 624af8fcfb4..411f6b59700 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -52,7 +52,14 @@ config IOSCHED_SIO ensure fairness. The algorithm does not do any sorting but basic merging, trying to keep a minimum overhead. It is aimed mainly for aleatory access devices (eg: flash devices). - + +config IOSCHED_VR + tristate "V(R) I/O scheduler" + default n + ---help--- + Requests are chosen according to SSTF with a penalty of rev_penalty + for switching head direction. + choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -71,6 +78,9 @@ choice config DEFAULT_SIO bool "SIO" if IOSCHED_SIO=y + + config DEFAULT_VR + bool "V(R)" if IOSCHED_VR=y endchoice @@ -80,7 +90,7 @@ config DEFAULT_IOSCHED default "cfq" if DEFAULT_CFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO - + default "vr" if DEFAULT_VR endmenu endif diff --git a/block/Makefile b/block/Makefile index 7d657e71c48..2f8fb116ac3 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o +obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o diff --git a/block/vr-iosched.c b/block/vr-iosched.c new file mode 100644 index 00000000000..459ab721604 --- /dev/null +++ b/block/vr-iosched.c @@ -0,0 +1,452 @@ +/* +* V(R) I/O Scheduler +* +* Copyright (C) 2007 Aaron Carroll +* +* +* The algorithm: +* +* The next request is decided based on its distance from the last +* request, with a multiplicative penalty of `rev_penalty' applied +* for reversing the head direction. A rev_penalty of 1 means SSTF +* behaviour. As this variable is increased, the algorithm approaches +* pure SCAN. Setting rev_penalty to 0 forces SCAN. +* +* Async and synch requests are not treated seperately. Instead we +* rely on deadlines to ensure fairness. +* +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +enum vr_data_dir { +ASYNC, +SYNC, +}; + +enum vr_head_dir { +FORWARD, +BACKWARD, +}; + +static const int sync_expire = HZ / 2; /* max time before a sync is submitted. */ +static const int async_expire = 5 * HZ; /* ditto for async, these limits are SOFT! */ +static const int fifo_batch = 1; +static const int rev_penalty = 1; /* penalty for reversing head direction */ + +struct vr_data { +struct rb_root sort_list; +struct list_head fifo_list[2]; + +struct request *next_rq; +struct request *prev_rq; + +unsigned int nbatched; +sector_t last_sector; /* head position */ +int head_dir; + +/* tunables */ +int fifo_expire[2]; +int fifo_batch; +int rev_penalty; +}; + +static void vr_move_request(struct vr_data *, struct request *); + +static inline struct vr_data * +vr_get_data(struct request_queue *q) +{ +return q->elevator->elevator_data; +} + +static void +vr_add_rq_rb(struct vr_data *vd, struct request *rq) +{ +//struct request *alias = elv_rb_add(&vd->sort_list, rq); +// +//if (unlikely(alias)) { +//vr_move_request(vd, alias); +//alias = elv_rb_add(&vd->sort_list, rq); +//BUG_ON(alias); +//} +elv_rb_add(&vd->sort_list, rq); +if (blk_rq_pos(rq) >= vd->last_sector) { +if (!vd->next_rq || blk_rq_pos(vd->next_rq) > blk_rq_pos(rq)) +vd->next_rq = rq; +} +else { +if (!vd->prev_rq || blk_rq_pos(vd->prev_rq) < blk_rq_pos(rq)) +vd->prev_rq = rq; +} + +BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); +BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq)); +} + +static void +vr_del_rq_rb(struct vr_data *vd, struct request *rq) +{ +/* +* We might be deleting our cached next request. +* If so, find its sucessor. +*/ + +if (vd->next_rq == rq) +vd->next_rq = elv_rb_latter_request(NULL, rq); +else if (vd->prev_rq == rq) +vd->prev_rq = elv_rb_former_request(NULL, rq); + +BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); +BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq)); + +elv_rb_del(&vd->sort_list, rq); +} + +/* +* add rq to rbtree and fifo +*/ +static void +vr_add_request(struct request_queue *q, struct request *rq) +{ +struct vr_data *vd = vr_get_data(q); +const int dir = rq_is_sync(rq); + +vr_add_rq_rb(vd, rq); + +if (vd->fifo_expire[dir]) { +rq_set_fifo_time(rq, jiffies + vd->fifo_expire[dir]); +list_add_tail(&rq->queuelist, &vd->fifo_list[dir]); +} +} + +/* +* remove rq from rbtree and fifo. +*/ +static void +vr_remove_request(struct request_queue *q, struct request *rq) +{ +struct vr_data *vd = vr_get_data(q); + +rq_fifo_clear(rq); +vr_del_rq_rb(vd, rq); +} + +static int +vr_merge(struct request_queue *q, struct request **rqp, struct bio *bio) +{ +sector_t sector = bio->bi_sector + bio_sectors(bio); +struct vr_data *vd = vr_get_data(q); +struct request *rq = elv_rb_find(&vd->sort_list, sector); + +if (rq && elv_rq_merge_ok(rq, bio)) { +*rqp = rq; +return ELEVATOR_FRONT_MERGE; +} +return ELEVATOR_NO_MERGE; +} + +static void +vr_merged_request(struct request_queue *q, struct request *req, int type) +{ +struct vr_data *vd = vr_get_data(q); + +/* +* if the merge was a front merge, we need to reposition request +*/ +if (type == ELEVATOR_FRONT_MERGE) { +vr_del_rq_rb(vd, req); +vr_add_rq_rb(vd, req); +} +} + +static void +vr_merged_requests(struct request_queue *q, struct request *rq, +struct request *next) +{ +/* +* if next expires before rq, assign its expire time to rq +* and move into next position (next will be deleted) in fifo +*/ +if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { +if (time_before(rq_fifo_time(next), rq_fifo_time(rq))) { +list_move(&rq->queuelist, &next->queuelist); +rq_set_fifo_time(rq, rq_fifo_time(next)); +} +} + +vr_remove_request(q, next); +} + +/* +* move an entry to dispatch queue +*/ +static void +vr_move_request(struct vr_data *vd, struct request *rq) +{ +struct request_queue *q = rq->q; + +if (blk_rq_pos(rq) > vd->last_sector) +vd->head_dir = FORWARD; +else +vd->head_dir = BACKWARD; + +vd->last_sector = blk_rq_pos(rq); +vd->next_rq = elv_rb_latter_request(NULL, rq); +vd->prev_rq = elv_rb_former_request(NULL, rq); + +BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); + +vr_remove_request(q, rq); +elv_dispatch_add_tail(q, rq); +vd->nbatched++; +} + +/* +* get the first expired request in direction ddir +*/ +static struct request * +vr_expired_request(struct vr_data *vd, int ddir) +{ +struct request *rq; + +if (list_empty(&vd->fifo_list[ddir])) +return NULL; + +rq = rq_entry_fifo(vd->fifo_list[ddir].next); +if (time_after(jiffies, rq_fifo_time(rq))) +return rq; + +return NULL; +} + +/* +* Returns the oldest expired request +*/ +static struct request * +vr_check_fifo(struct vr_data *vd) +{ +struct request *rq_sync = vr_expired_request(vd, SYNC); +struct request *rq_async = vr_expired_request(vd, ASYNC); + +if (rq_async && rq_sync) { +if (time_after(rq_fifo_time(rq_async), rq_fifo_time(rq_sync))) +return rq_sync; +} +else if (rq_sync) +return rq_sync; + +return rq_async; +} + +/* +* Return the request with the lowest penalty +*/ +static struct request * +vr_choose_request(struct vr_data *vd) +{ +int penalty = (vd->rev_penalty) ? : INT_MAX; +struct request *next = vd->next_rq; +struct request *prev = vd->prev_rq; +sector_t next_pen, prev_pen; + +BUG_ON(prev && prev == next); + +if (!prev) +return next; +else if (!next) +return prev; + +/* At this point both prev and next are defined and distinct */ + +next_pen = blk_rq_pos(next) - vd->last_sector; +prev_pen = vd->last_sector - blk_rq_pos(prev); + +if (vd->head_dir == FORWARD) +next_pen = do_div(next_pen, penalty); +else +prev_pen = do_div(prev_pen, penalty); + +if (next_pen <= prev_pen) +return next; + +return prev; +} + +static int +vr_dispatch_requests(struct request_queue *q, int force) +{ +struct vr_data *vd = vr_get_data(q); +struct request *rq = NULL; + +/* Check for and issue expired requests */ +if (vd->nbatched > vd->fifo_batch) { +vd->nbatched = 0; +rq = vr_check_fifo(vd); +} + +if (!rq) { +rq = vr_choose_request(vd); +if (!rq) +return 0; +} + +vr_move_request(vd, rq); + +return 1; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) +static int +vr_queue_empty(struct request_queue *q) +{ +struct vr_data *vd = vr_get_data(q); +return RB_EMPTY_ROOT(&vd->sort_list); +} +#endif + +static void +vr_exit_queue(struct elevator_queue *e) +{ +struct vr_data *vd = e->elevator_data; +BUG_ON(!RB_EMPTY_ROOT(&vd->sort_list)); +kfree(vd); +} + +/* +* initialize elevator private data (vr_data). +*/ +static void *vr_init_queue(struct request_queue *q) +{ +struct vr_data *vd; + +vd = kmalloc_node(sizeof(*vd), GFP_KERNEL | __GFP_ZERO, q->node); +if (!vd) +return NULL; + +INIT_LIST_HEAD(&vd->fifo_list[SYNC]); +INIT_LIST_HEAD(&vd->fifo_list[ASYNC]); +vd->sort_list = RB_ROOT; +vd->fifo_expire[SYNC] = sync_expire; +vd->fifo_expire[ASYNC] = async_expire; +vd->fifo_batch = fifo_batch; +vd->rev_penalty = rev_penalty; +return vd; +} + +/* +* sysfs parts below +*/ + +static ssize_t +vr_var_show(int var, char *page) +{ +return sprintf(page, "%d\n", var); +} + +static ssize_t +vr_var_store(int *var, const char *page, size_t count) +{ +*var = simple_strtol(page, NULL, 10); +return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ +struct vr_data *vd = e->elevator_data; \ +int __data = __VAR; \ +if (__CONV) \ +__data = jiffies_to_msecs(__data); \ +return vr_var_show(__data, (page)); \ +} +SHOW_FUNCTION(vr_sync_expire_show, vd->fifo_expire[SYNC], 1); +SHOW_FUNCTION(vr_async_expire_show, vd->fifo_expire[ASYNC], 1); +SHOW_FUNCTION(vr_fifo_batch_show, vd->fifo_batch, 0); +SHOW_FUNCTION(vr_rev_penalty_show, vd->rev_penalty, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ +struct vr_data *vd = e->elevator_data; \ +int __data; \ +int ret = vr_var_store(&__data, (page), count); \ +if (__data < (MIN)) \ +__data = (MIN); \ +else if (__data > (MAX)) \ +__data = (MAX); \ +if (__CONV) \ +*(__PTR) = msecs_to_jiffies(__data); \ +else \ +*(__PTR) = __data; \ +return ret; \ +} +STORE_FUNCTION(vr_sync_expire_store, &vd->fifo_expire[SYNC], 0, INT_MAX, 1); +STORE_FUNCTION(vr_async_expire_store, &vd->fifo_expire[ASYNC], 0, INT_MAX, 1); +STORE_FUNCTION(vr_fifo_batch_store, &vd->fifo_batch, 0, INT_MAX, 0); +STORE_FUNCTION(vr_rev_penalty_store, &vd->rev_penalty, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +#define DD_ATTR(name) \ +__ATTR(name, S_IRUGO|S_IWUSR, vr_##name##_show, \ +vr_##name##_store) + +static struct elv_fs_entry vr_attrs[] = { +DD_ATTR(sync_expire), +DD_ATTR(async_expire), +DD_ATTR(fifo_batch), +DD_ATTR(rev_penalty), +__ATTR_NULL +}; + +static struct elevator_type iosched_vr = { +.ops = { +.elevator_merge_fn = vr_merge, +.elevator_merged_fn = vr_merged_request, +.elevator_merge_req_fn = vr_merged_requests, +.elevator_dispatch_fn = vr_dispatch_requests, +.elevator_add_req_fn = vr_add_request, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) +.elevator_queue_empty_fn = vr_queue_empty, +#endif +.elevator_former_req_fn = elv_rb_former_request, +.elevator_latter_req_fn = elv_rb_latter_request, +.elevator_init_fn = vr_init_queue, +.elevator_exit_fn = vr_exit_queue, +}, + +.elevator_attrs = vr_attrs, +.elevator_name = "vr", +.elevator_owner = THIS_MODULE, +}; + +static int __init vr_init(void) +{ +elv_register(&iosched_vr); + +return 0; +} + +static void __exit vr_exit(void) +{ +elv_unregister(&iosched_vr); +} + +module_init(vr_init); +module_exit(vr_exit); + +MODULE_AUTHOR("Aaron Carroll"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("V(R) IO scheduler"); + From 1821ace24c80a00344f069c50b59bd747d33de7f Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 18 Jul 2012 00:14:51 -0400 Subject: [PATCH 015/678] tegra: custom user voltage control (thansk faux123) -also some voltage tweaks and debugging statement tweaks --- arch/arm/mach-tegra/Kconfig | 7 +++ arch/arm/mach-tegra/tegra3_dvfs.c | 14 ++++-- drivers/cpufreq/cpufreq.c | 71 +++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 9ac04ddf8ed..355aec63db6 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -271,6 +271,13 @@ config TEGRA_EMC_SCALING_ENABLE depends on TEGRA_SILICON_PLATFORM default n +config VOLTAGE_CONTROL + bool "Enable user voltage control on Tegra CPU" + depends on TEGRA_SILICON_PLATFORM + default n + help + User custom voltage control interface + config TEGRA_CPU_DVFS bool "Enable voltage scaling on Tegra CPU" depends on TEGRA_SILICON_PLATFORM diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 7861e6a9109..6a3b8be8365 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -28,12 +28,17 @@ #include "board.h" #include "tegra3_emc.h" +#ifdef CONFIG_VOLTAGE_CONTROL +int user_mv_table[MAX_DVFS_FREQS] = { + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1185, 1200, 1237}; +#endif + static bool tegra_dvfs_cpu_disabled; static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1185, 1200, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1200, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50}; @@ -511,7 +516,7 @@ static int __init get_cpu_nominal_mv_index( * result to the nominal cpu level for the chips with this speedo_id. */ mv = tegra3_dvfs_rail_vdd_core.nominal_millivolts; - printk("tegra3_speedo: tegra3_dvfs_rail_vdd_core.nominal_millivolts mV for cpu_speedo_id:%u is %umV\n",speedo_id,mv); + pr_info("tegra3_dvfs: %s: tegra3_dvfs_rail_vdd_core.nominal_millivolts mV for cpu_speedo_id: %u is %umV\n",__func__,speedo_id,mv); for (i = 0; i < MAX_DVFS_FREQS; i++) { if ((cpu_millivolts[i] == 0) || tegra3_get_core_floor_mv(cpu_millivolts[i]) > mv) @@ -519,10 +524,10 @@ static int __init get_cpu_nominal_mv_index( } BUG_ON(i == 0); mv = cpu_millivolts[i - 1]; + pr_info("tegra3_dvfs: %s: cpu mv: %i\n", __func__, mv); BUG_ON(mv < tegra3_dvfs_rail_vdd_cpu.min_millivolts); mv = min(mv, tegra_cpu_speedo_mv()); - - printk("tegra3_speedo: nominal mV for cpu_speedo_id:%u is %umV",speedo_id,mv); + pr_info("tegra3_dvfs: %s: nominal mV for cpu_speedo_id:%u is %umV\n",__func__,speedo_id,mv); /* * Find matching cpu dvfs entry, and use it to determine index to the @@ -561,6 +566,7 @@ static int __init get_cpu_nominal_mv_index( speedo_id, process_id, d->freqs[i-1] * d->freqs_mult); *cpu_dvfs = d; + pr_info("tegra3_dvfs: %s: cpu_nominal_mv_index: %i\n",__func__, i - 1); return (i - 1); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 118a94575ca..5ef58048041 100755 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -596,6 +596,70 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) return sprintf(buf, "%u\n", policy->cpuinfo.max_freq); } +#ifdef CONFIG_VOLTAGE_CONTROL +/* + * Tegra3 voltage control via cpufreq by Paul Reioux (faux123) + * inspired by Michael Huang's voltage control code for OMAP44xx + */ + +#include "../../arch/arm/mach-tegra/dvfs.h" +#include "../../arch/arm/mach-tegra/clock.h" + +extern int user_mv_table[MAX_DVFS_FREQS]; + +static ssize_t show_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); + + /* find how many actual entries there are */ + i = cpu_clk_g->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + cpu_clk_g->dvfs->freqs[i]/1000000, + cpu_clk_g->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); + + /* find how many actual entries there are */ + i = cpu_clk_g->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(cpu_clk_g->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + user_mv_table[i] = volt_cur; + pr_info("user mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + /* update dvfs table here */ + cpu_clk_g->dvfs->millivolts = user_mv_table; + + return count; +} +#endif + cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400); cpufreq_freq_attr_ro(cpuinfo_min_freq); cpufreq_freq_attr_ro(cpuinfo_max_freq); @@ -613,6 +677,9 @@ cpufreq_freq_attr_rw(scaling_setspeed); cpufreq_freq_attr_rw(dvfs_test); cpufreq_freq_attr_ro(policy_min_freq); cpufreq_freq_attr_ro(policy_max_freq); +#ifdef CONFIG_VOLTAGE_CONTROL +cpufreq_freq_attr_rw(UV_mV_table); +#endif static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, @@ -629,6 +696,10 @@ static struct attribute *default_attrs[] = { &dvfs_test.attr, &policy_min_freq.attr, &policy_max_freq.attr, +#ifdef CONFIG_VOLTAGE_CONTROL + &UV_mV_table.attr, +#endif + NULL }; From 50913300ec8acdb7c7e2bae2d6b07d7253728a69 Mon Sep 17 00:00:00 2001 From: motley Date: Thu, 19 Jul 2012 21:57:35 -0400 Subject: [PATCH 016/678] Updated defconfig -voltage control switch -V(R) i/o scheduler -default i/o should still be SIO --- arch/arm/configs/motley_grouper_defconfig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index 42eea718501..41aeb869c25 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -180,11 +180,13 @@ CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y CONFIG_IOSCHED_SIO=y +CONFIG_IOSCHED_VR=y # CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set -# CONFIG_DEFAULT_SIO is not set -CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_DEFAULT_SIO=y +# CONFIG_DEFAULT_VR is not set +CONFIG_DEFAULT_IOSCHED="sio" # CONFIG_INLINE_SPIN_TRYLOCK is not set # CONFIG_INLINE_SPIN_TRYLOCK_BH is not set # CONFIG_INLINE_SPIN_LOCK is not set @@ -311,6 +313,7 @@ CONFIG_TEGRA_PWM=y CONFIG_TEGRA_FIQ_DEBUGGER=y # CONFIG_TEGRA_CARDHU_DSI is not set CONFIG_TEGRA_EMC_SCALING_ENABLE=y +CONFIG_VOLTAGE_CONTROL=y CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y CONFIG_TEGRA_IOVMM_SMMU=y From d4361c04a8c9d05f1db017a1adb5043fe54b2133 Mon Sep 17 00:00:00 2001 From: motley Date: Sat, 21 Jul 2012 00:24:47 -0400 Subject: [PATCH 017/678] board-grouper-panel: increased panel clock rate - increases fps without further GPU clock --- arch/arm/mach-tegra/board-grouper-panel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index e05433934d7..bc38a170e59 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -389,7 +389,7 @@ static struct resource grouper_disp2_resources[] = { static struct tegra_dc_mode grouper_panel_modes[] = { { /* 1280x800@60Hz */ - .pclk = 68000000, + .pclk = 74180000, .h_ref_to_sync = 1, .v_ref_to_sync = 1, .h_sync_width = 24, From 452d71b74a2ff6b3b90fcf4902aa350e7f7a08ca Mon Sep 17 00:00:00 2001 From: motley Date: Sat, 21 Jul 2012 00:28:38 -0400 Subject: [PATCH 018/678] tegra3 dvfs/clock tweaks -Allow conditional GPU OC compile -Frequency and voltage tweaks -Hope to fix units that can't OC (CPU process_id = 3) -Tweak cputegra to see if we can get the CPU to settle back a bit -Tweak frequencies, some lower frequencies should be used more often for better battery --- arch/arm/mach-tegra/Kconfig | 7 ++++++ arch/arm/mach-tegra/cpu-tegra.c | 9 +------ arch/arm/mach-tegra/tegra3_clocks.c | 22 ++++++++--------- arch/arm/mach-tegra/tegra3_dvfs.c | 38 +++++++++++++++++++++++------ 4 files changed, 50 insertions(+), 26 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 355aec63db6..4cfca6df517 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -278,6 +278,13 @@ config VOLTAGE_CONTROL help User custom voltage control interface +config GPU_OVERCLOCK + bool "Enable GPU overclock for Tegra3" + depends on TEGRA_SILICON_PLATFORM + default n + help + Choose y to overclock the GPU + config TEGRA_CPU_DVFS bool "Enable voltage scaling on Tegra CPU" depends on TEGRA_SILICON_PLATFORM diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index dc992e6dff5..0e7c8156e8e 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -339,15 +339,8 @@ static int tegra_cpu_edp_notify( cpu_speed = tegra_getspeed(0); -#ifdef TEGRA3_OVERCLOCK - if(edp_enable) { - pr_info("%s: tegra_cpu_edp_notify(). Dynamic EDP is enabled.\n", __func__); - new_speed = edp_governor_speed(new_speed); - pr_info("%s: tegra_cpu_edp_notify(). Dynamic EDP is disabled.\n", __func__); - } -#else new_speed = edp_governor_speed(cpu_speed); -#endif + if (new_speed < cpu_speed) { ret = tegra_cpu_set_speed_cap(NULL); if (ret) { diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index b9e3e25c1b7..f65ffd7177d 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4553,11 +4553,11 @@ static struct cpufreq_frequency_table freq_table_1p0GHz[] = { { 0, 51000 }, { 1, 102000 }, { 2, 204000 }, - { 3, 312000 }, - { 4, 456000 }, - { 5, 608000 }, + { 3, 340000 }, + { 4, 475000 }, + { 5, 640000 }, { 6, 760000 }, - { 7, 816000 }, + { 7, 860000 }, { 8, 912000 }, { 9, 1000000 }, {10, CPUFREQ_TABLE_END }, @@ -4585,7 +4585,7 @@ static struct cpufreq_frequency_table freq_table_1p4GHz[] = { { 2, 204000 }, { 3, 370000 }, { 4, 475000 }, - { 5, 620000 }, + { 5, 640000 }, { 6, 760000 }, { 7, 860000 }, { 8, 1000000 }, @@ -4618,12 +4618,12 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { { 0, 51000 }, { 1, 102000 }, { 2, 204000 }, - { 3, 370000 }, + { 3, 340000 }, { 4, 475000 }, - { 5, 620000 }, - { 6, 760000 }, - { 7, 910000 }, - { 8, 1150000 }, + { 5, 640000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, { 9, 1200000 }, {10, 1300000 }, {11, 1400000 }, @@ -4638,7 +4638,7 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 2, 204000 }, { 3, 370000 }, { 4, 475000 }, - { 5, 620000 }, + { 5, 640000 }, { 6, 760000 }, { 7, 910000 }, { 8, 1150000 }, diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 6a3b8be8365..f3e285972c1 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1185, 1200, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1200, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -146,7 +146,7 @@ static struct dvfs_relationship tegra3_dvfs_relationships[] = { } static struct dvfs cpu_dvfs_table[] = { - /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ + /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1200, 1237 */ CPU_DVFS("cpu_g", 0, 0, MHZ, 1, 1, 684, 684, 817, 817, 1026, 1102, 1149, 1187, 1225, 1282, 1300), CPU_DVFS("cpu_g", 0, 1, MHZ, 1, 1, 807, 807, 948, 948, 1117, 1171, 1206, 1300), CPU_DVFS("cpu_g", 0, 2, MHZ, 1, 1, 883, 883, 1039, 1039, 1178, 1206, 1300), @@ -177,10 +177,10 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 6, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1240, 1280, 1360, 1390, 1470, 1500, 1520, 1520, 1590, 1700), CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600), - CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300, 1320, 1360, 1360, 1500, 1540, 1600), - CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500, 1540, 1600), - CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1300), - CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1300), + CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1420, 1500, 1540, 1600), + CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1240, 1280, 1320, 1380, 1420, 1500, 1540, 1600), + CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1420, 1500, 1540, 1600), + CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1280, 1320, 1340, 1360, 1380, 1420, 1450, 1500, 1540, 1600), CPU_DVFS("cpu_g", 8, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 8, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), @@ -240,6 +240,7 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("vi", 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), CORE_DVFS("vi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), +#ifdef CONFIG_GPU_OVERCLOCK CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -255,6 +256,23 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), +#else + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), +#endif CORE_DVFS("vde", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("mpe", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), @@ -276,11 +294,17 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), - +#ifdef CONFIG_GPU_OVERCLOCK CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), +#else + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 416000, 416000, 416000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 416000, 416000, 416000), + CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), +#endif CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), From 9b2c73783fff418eaa82f85ea39b36d980d28651 Mon Sep 17 00:00:00 2001 From: motley Date: Sat, 21 Jul 2012 00:29:19 -0400 Subject: [PATCH 019/678] Updated defconfig Added some VPN/networking capabilities for those that need it (L2TP, IP_GRE_DEMUX,INET_AH, INET_XFRM_MODE_BEET) Some unnecessary debugging options turned off. Should save kernel RAM usage. --- arch/arm/configs/motley_grouper_defconfig | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index 41aeb869c25..4cca9916f3f 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -314,6 +314,7 @@ CONFIG_TEGRA_FIQ_DEBUGGER=y # CONFIG_TEGRA_CARDHU_DSI is not set CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_VOLTAGE_CONTROL=y +CONFIG_GPU_OVERCLOCK=y CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y CONFIG_TEGRA_IOVMM_SMMU=y @@ -592,18 +593,19 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE_DEMUX is not set +CONFIG_NET_IPGRE_DEMUX=y +# CONFIG_NET_IPGRE is not set # CONFIG_IP_MROUTE is not set # CONFIG_ARPD is not set # CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set +CONFIG_INET_AH=y CONFIG_INET_ESP=y # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set CONFIG_INET_TUNNEL=y CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y -# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_XFRM_MODE_BEET=y # CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_TCP_CONG_ADVANCED is not set @@ -810,7 +812,9 @@ CONFIG_IP6_NF_RAW=y # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set -# CONFIG_L2TP is not set +CONFIG_L2TP=y +# CONFIG_L2TP_DEBUGFS is not set +# CONFIG_L2TP_V3 is not set # CONFIG_BRIDGE is not set # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set @@ -1297,6 +1301,8 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_BSDCOMP=y CONFIG_PPP_MPPE=y # CONFIG_PPPOE is not set +# CONFIG_PPTP is not set +# CONFIG_PPPOL2TP is not set CONFIG_PPPOLAC=y CONFIG_PPPOPNS=y # CONFIG_SLIP is not set @@ -3106,7 +3112,7 @@ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 # CONFIG_DETECT_HUNG_TASK is not set CONFIG_SCHED_DEBUG=y -CONFIG_SCHEDSTATS=y +# CONFIG_SCHEDSTATS is not set CONFIG_TIMER_STATS=y # CONFIG_DEBUG_OBJECTS is not set # CONFIG_DEBUG_SLAB is not set @@ -3127,8 +3133,7 @@ CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_HIGHMEM is not set CONFIG_DEBUG_BUGVERBOSE=y -CONFIG_DEBUG_INFO=y -# CONFIG_DEBUG_INFO_REDUCED is not set +# CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set @@ -3140,7 +3145,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_RCU_CPU_STALL_VERBOSE=y +# CONFIG_RCU_CPU_STALL_VERBOSE is not set # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set From ec0c3be4b1bcce8e101f7a3df1d73af3bdaaba75 Mon Sep 17 00:00:00 2001 From: motley Date: Sat, 21 Jul 2012 17:19:05 -0400 Subject: [PATCH 020/678] cpu-tegra.c: default both force_policy_max and edp_enable to "1" force_policy_max is available in sysfs under the cpu-tegra module if a ROM dev would want to override this for some reason. This was causing the cpu to spike above the frequency limit set from userland or during ramdisk init. edp_enable (a static variable not in sysfs) will be flipped immediately to "0" unless the cpu is very warm on boot. --- arch/arm/mach-tegra/cpu-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 0e7c8156e8e..0786ec500fe 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -56,11 +56,11 @@ static DEFINE_MUTEX(tegra_cpu_lock); static bool is_suspended; static int suspend_index; -static bool force_policy_max; +static bool force_policy_max = 1; #define TEGRA3_OVERCLOCK #define TEGRA3_DYNAMIC_EDP_THRES_TEMP (60) -static bool edp_enable = 0; +static bool edp_enable = 1; static int force_policy_max_set(const char *arg, const struct kernel_param *kp) { From b84ec81c3f75c48dc1d66a72b64cb7a190037738 Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 22 Jul 2012 23:52:37 -0400 Subject: [PATCH 021/678] Increase GPU clock to 484MHz --- arch/arm/mach-tegra/tegra3_dvfs.c | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index f3e285972c1..6716554a8d0 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -241,21 +241,21 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("vi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), #ifdef CONFIG_GPU_OVERCLOCK - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), #else CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), @@ -295,13 +295,13 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), #ifdef CONFIG_GPU_OVERCLOCK - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #else - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 416000, 416000, 416000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 416000, 416000, 416000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif From 32c440bf5e8257b07328f98c7dca13bbcc8f6fee Mon Sep 17 00:00:00 2001 From: motley Date: Mon, 23 Jul 2012 00:06:35 -0400 Subject: [PATCH 022/678] Compiler optimizations -fmodulo-sched, -fmodulo-sched-allow-regmoves, -funswitch-loops, -fpredictive-commoning, -fgcse-after-reload, -ftree-vectorize, -floop-interchange, -floop-strip-mine, -floop-block, -mfpu=neon --- Makefile | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 7c51ce36b64..7e38fdac3cd 100644 --- a/Makefile +++ b/Makefile @@ -365,11 +365,17 @@ LINUXINCLUDE := -I$(srctree)/arch/$(hdr-arch)/include \ KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common \ - -Werror-implicit-function-declaration \ - -Wno-format-security \ - -fno-delete-null-pointer-checks -KBUILD_AFLAGS_KERNEL := + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration \ + -Wno-format-security \ + -fno-delete-null-pointer-checks \ + -mtune=cortex-a9 -mfpu=neon \ + -fmodulo-sched -fmodulo-sched-allow-regmoves \ + -funswitch-loops -fpredictive-commoning -fgcse-after-reload \ + -ftree-vectorize -floop-interchange -floop-strip-mine -floop-block + + + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := KBUILD_AFLAGS := -D__ASSEMBLY__ KBUILD_AFLAGS_MODULE := -DMODULE From f5716e473d4916a4ebc5856a68c39b6fe284b9e7 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 24 Jul 2012 22:18:16 -0400 Subject: [PATCH 023/678] video: tegra: host: Fix error case memory leaks When a submit fails, the related nvhost_job is not freed. Add an explicit free. Also, 3D is mapping the save buffer, but it is not unmapped. Bug 991972 --- drivers/video/tegra/host/bus_client.c | 5 +++++ drivers/video/tegra/host/gr3d/gr3d_t30.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/video/tegra/host/bus_client.c b/drivers/video/tegra/host/bus_client.c index e56adecedb2..eb6fc1b7aab 100644 --- a/drivers/video/tegra/host/bus_client.c +++ b/drivers/video/tegra/host/bus_client.c @@ -252,6 +252,11 @@ static void reset_submit(struct nvhost_channel_userctx *ctx) ctx->hdr.num_relocs = 0; ctx->num_relocshifts = 0; ctx->hdr.num_waitchks = 0; + + if (ctx->job) { + nvhost_job_put(ctx->job); + ctx->job = NULL; + } } static ssize_t nvhost_channelwrite(struct file *filp, const char __user *buf, diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.c b/drivers/video/tegra/host/gr3d/gr3d_t30.c index 8ca6b7b44b9..174bbde1124 100644 --- a/drivers/video/tegra/host/gr3d/gr3d_t30.c +++ b/drivers/video/tegra/host/gr3d/gr3d_t30.c @@ -425,6 +425,8 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init( setup_save(p, save_ptr); + nvmap_munmap(p->save_buf, save_ptr); + p->h.alloc = ctx3d_alloc_v1; p->h.save_push = save_push_v1; p->h.save_service = NULL; From afc1db4d79ca40156a258339f75059844f8c94fd Mon Sep 17 00:00:00 2001 From: Krishna Reddy Date: Wed, 25 Jul 2012 00:46:32 -0400 Subject: [PATCH 024/678] mm: Ensure pte and pmd stores ordering. Bug 974153 --- arch/arm/mm/pageattr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index 5f8071110e8..cc41a6a0619 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -331,6 +331,10 @@ static void __set_pmd_pte(pmd_t *pmd, unsigned long address, pte_t *pte) cpa_debug("__set_pmd_pte %x %x %x\n", pmd, pte, *pte); + /* enforce pte entry stores ordering to avoid pmd writes + * bypassing pte stores. + */ + dsb(); /* change init_mm */ pmd_populate_kernel(&init_mm, pmd, pte); @@ -342,7 +346,10 @@ static void __set_pmd_pte(pmd_t *pmd, unsigned long address, pte_t *pte) pgd_index(address), address); pmd_populate_kernel(NULL, pmd, pte); } - + /* enforce pmd entry stores ordering to avoid tlb flush bypassing + * pmd entry stores. + */ + dsb(); } static int From 778bacea9c4a3044f4d33e1eaf0560db8628f1e4 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 25 Jul 2012 00:49:04 -0400 Subject: [PATCH 025/678] Get rid of some kernel log spam --- arch/arm/kernel/smp.c | 4 ++-- drivers/power/bq27541_battery.c | 2 +- kernel/time/tick-sched.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index bdfb37c5953..47b09a3ea04 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -215,7 +215,7 @@ void __cpu_die(unsigned int cpu) pr_err("CPU%u: cpu didn't die\n", cpu); return; } - printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); + pr_debug(KERN_NOTICE "CPU%u: shutdown\n", cpu); if (!platform_cpu_kill(cpu)) printk("CPU%u: unable to kill\n", cpu); @@ -280,7 +280,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); - printk("CPU%u: Booted secondary processor\n", cpu); + pr_debug("CPU%u: Booted secondary processor\n", cpu); /* * All kernel threads share the same mm context; grab a diff --git a/drivers/power/bq27541_battery.c b/drivers/power/bq27541_battery.c index 736785e5a58..00d35d44c09 100755 --- a/drivers/power/bq27541_battery.c +++ b/drivers/power/bq27541_battery.c @@ -64,7 +64,7 @@ /* Debug Message */ #define BAT_NOTICE(format, arg...) \ - printk(KERN_NOTICE "%s " format , __FUNCTION__ , ## arg) + pr_debug(KERN_NOTICE "%s " format , __FUNCTION__ , ## arg) #define BAT_ERR(format, arg...) \ printk(KERN_ERR format , ## arg) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b40..d5f59e37f82 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -641,7 +641,7 @@ static void tick_nohz_switch_to_nohz(void) } local_irq_enable(); - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); + pr_debug(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -795,7 +795,7 @@ void tick_setup_sched_timer(void) #ifdef CONFIG_NO_HZ if (tick_nohz_enabled) { ts->nohz_mode = NOHZ_MODE_HIGHRES; - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); + pr_debug(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } #endif } From 607b75a1aff412d935c555ed5a257cd68b3e4a17 Mon Sep 17 00:00:00 2001 From: codeworkx Date: Wed, 25 Jul 2012 08:38:02 -0400 Subject: [PATCH 026/678] HACK: block fbearlysuspend to not break androids crt-off animation Thanks drewis (repo) and aaronpoweruser for pointing it out. Untested with CRT animation (by me), but this may help with ROMs that have this functionality. --- kernel/power/fbearlysuspend.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/power/fbearlysuspend.c b/kernel/power/fbearlysuspend.c index 0f970b6b87b..0c547085119 100644 --- a/kernel/power/fbearlysuspend.c +++ b/kernel/power/fbearlysuspend.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include @@ -33,6 +34,10 @@ static void stop_drawing_early_suspend(struct early_suspend *h) int ret; unsigned long irq_flags; + /* FIXME: earlysuspend breaks androids CRT-off animation + * Sleep a little bit to get it played properly */ + msleep(500); + spin_lock_irqsave(&fb_state_lock, irq_flags); fb_state = FB_STATE_REQUEST_STOP_DRAWING; spin_unlock_irqrestore(&fb_state_lock, irq_flags); From f14a2b8bd67663304046b13f6d2106e35df7601d Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 25 Jul 2012 08:41:29 -0400 Subject: [PATCH 027/678] cpu-tegra3: modified the hot-plug governor down_delay to be 1s instead of 2s. --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 9ec18b2c88b..ef73d4b4e93 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -41,7 +41,7 @@ #define INITIAL_STATE TEGRA_HP_DISABLED #define UP2G0_DELAY_MS 70 #define UP2Gn_DELAY_MS 100 -#define DOWN_DELAY_MS 2000 +#define DOWN_DELAY_MS 1000 static struct mutex *tegra3_cpu_lock; From c1ebb54adea5be78ea70d52690c62db5d4f09a37 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 25 Jul 2012 23:47:53 -0400 Subject: [PATCH 028/678] ARM/VFP compiler optimizations --- arch/arm/Makefile | 6 +++--- arch/arm/vfp/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 70c424eaf7b..f6fbf49bea2 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -57,7 +57,7 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option, -O3 -march=armv7-a -mcpu=cortex-a9 -mtune=cortex-a9 -mfloat-abi=softfp -mfpu=neon -funsafe-math-optimizations -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 @@ -113,8 +113,8 @@ endif endif # Need -Uarm for gcc < 3.x -KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float +KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_THUMB2) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -mfpu=neon -Uarm +KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float -mfpu=neon CHECKFLAGS += -D__arm__ diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index 6de73aab019..28d48ff3db7 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -7,7 +7,7 @@ # ccflags-y := -DDEBUG # asflags-y := -DDEBUG -KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp) +KBUILD_AFLAGS :=$(KBUILD_AFLAGS: -O3 -msoft-float=-Wa,-mfpu=neon -funsafe-math-optimizations) LDFLAGS +=--no-warn-mismatch obj-y += vfp.o From 65ad0b30cd57ceecf6ca4b9f9ca843874824fb36 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 25 Jul 2012 23:59:42 -0400 Subject: [PATCH 029/678] Voltage Control tweak - let's ignore the highest freq slot for show and save since it shows 1.6GHz twice in the voltage table in System Tuner. We are are only allowing 1200mV for 1.6, so the top slot is not currently used. --- drivers/cpufreq/cpufreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5ef58048041..8d14ca7f04b 100755 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -614,7 +614,8 @@ static ssize_t show_UV_mV_table(struct cpufreq_policy *policy, char *buf) struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - i = cpu_clk_g->dvfs->num_freqs; + /* don't show the last 1.6 frequency, we don't use it */ + i = cpu_clk_g->dvfs->num_freqs - 1; for(i--; i >=0; i--) { out += sprintf(out, "%lumhz: %i mV\n", @@ -635,7 +636,8 @@ static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_ struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - i = cpu_clk_g->dvfs->num_freqs; + /* don't mess with the last 1.6 frequency, we don't use it */ + i = cpu_clk_g->dvfs->num_freqs - 1; for(i--; i >= 0; i--) { From b52ff62f6165b40054c60597473bf22b4563fef1 Mon Sep 17 00:00:00 2001 From: motley Date: Thu, 26 Jul 2012 00:02:31 -0400 Subject: [PATCH 030/678] cpu-tegra: let's skip the temporary downclock and kernel log spam if the custom EDP throttle is not currently enabled. --- arch/arm/mach-tegra/cpu-tegra.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 0786ec500fe..b14c868a1b0 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -338,9 +338,16 @@ static int tegra_cpu_edp_notify( edp_update_limit(); cpu_speed = tegra_getspeed(0); - + +#ifdef TEGRA3_OVERCLOCK + if(edp_enable) { + new_speed = edp_governor_speed(new_speed); + } else { + new_speed = cpu_speed; + } +#else new_speed = edp_governor_speed(cpu_speed); - +#endif if (new_speed < cpu_speed) { ret = tegra_cpu_set_speed_cap(NULL); if (ret) { From bc5a63c041d25ffb702cb7167bf826c3a9f1de92 Mon Sep 17 00:00:00 2001 From: motley Date: Thu, 26 Jul 2012 22:02:06 -0400 Subject: [PATCH 031/678] Revert "HACK: block fbearlysuspend to not break androids crt-off animation" This reverts commit 81c3785c26827c3e22dd5b269cfeb27dbccedd9f. --- kernel/power/fbearlysuspend.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/power/fbearlysuspend.c b/kernel/power/fbearlysuspend.c index 0c547085119..0f970b6b87b 100644 --- a/kernel/power/fbearlysuspend.c +++ b/kernel/power/fbearlysuspend.c @@ -13,7 +13,6 @@ * */ -#include #include #include #include @@ -34,10 +33,6 @@ static void stop_drawing_early_suspend(struct early_suspend *h) int ret; unsigned long irq_flags; - /* FIXME: earlysuspend breaks androids CRT-off animation - * Sleep a little bit to get it played properly */ - msleep(500); - spin_lock_irqsave(&fb_state_lock, irq_flags); fb_state = FB_STATE_REQUEST_STOP_DRAWING; spin_unlock_irqrestore(&fb_state_lock, irq_flags); From bffc0c384173593eb67cb32d70f20be126b0cce9 Mon Sep 17 00:00:00 2001 From: motley Date: Fri, 27 Jul 2012 00:27:13 -0400 Subject: [PATCH 032/678] cpufreq: governor additions -Added pegasusq governor (thanks Samsung and gokhanmoral) -Added lulzactive governor, but not built-in due to issues (thanks Tegrak) --- arch/arm/configs/motley_grouper_defconfig | 8 +- drivers/cpufreq/Kconfig | 14 + drivers/cpufreq/Makefile | 2 + drivers/cpufreq/cpufreq_lulzactive.c | 1011 +++++++++++++++ drivers/cpufreq/cpufreq_pegasusq.c | 1413 +++++++++++++++++++++ include/linux/cpufreq.h | 3 + 6 files changed, 2446 insertions(+), 5 deletions(-) create mode 100644 drivers/cpufreq/cpufreq_lulzactive.c create mode 100644 drivers/cpufreq/cpufreq_pegasusq.c diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index 4cca9916f3f..654d5159941 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -1,7 +1,3 @@ -# -# Automatically generated file; DO NOT EDIT. -# Linux/arm 3.1.10 Kernel Configuration -# CONFIG_ARM=y CONFIG_HAVE_PWM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -514,6 +510,8 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_INTERACTIVE=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set +CONFIG_CPU_FREQ_GOV_PEGASUSQ=y # # ARM CPU frequency scaling drivers @@ -3345,4 +3343,4 @@ CONFIG_HAS_DMA=y CONFIG_CPU_RMAP=y CONFIG_NLATTR=y # CONFIG_AVERAGE is not set -# CONFIG_CORDIC is not set +# CONFIG_CORDIC is not set \ No newline at end of file diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 57f96ebbce4..9859af5b158 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -206,6 +206,20 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. +config CPU_FREQ_GOV_LULZACTIVE + tristate "'lulzactive' cpufreq governor" + depends on CPU_FREQ + help + 'lulzactive' - a new interactive governor by Tegrak! + + If in doubt, say N. + +config CPU_FREQ_GOV_PEGASUSQ + tristate "'pegasusq' cpufreq policy governor" + depends on CPU_FREQ + help + 'pegasusq' - governor by Samsung + menu "x86 CPU frequency scaling drivers" depends on X86 source "drivers/cpufreq/Kconfig.x86" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index d43b39150ef..adc4e22d0ae 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -10,6 +10,8 @@ obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o +obj-$(CONFIG_CPU_FREQ_GOV_PEGASUSQ) += cpufreq_pegasusq.o +obj-$(CONFIG_CPU_FREQ_GOV_LULZACTIVE) += cpufreq_lulzactive.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o diff --git a/drivers/cpufreq/cpufreq_lulzactive.c b/drivers/cpufreq/cpufreq_lulzactive.c new file mode 100644 index 00000000000..0f2257a1582 --- /dev/null +++ b/drivers/cpufreq/cpufreq_lulzactive.c @@ -0,0 +1,1011 @@ +/* + * drivers/cpufreq/cpufreq_lulzactive.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + * Edited: Tegrak (luciferanna@gmail.com) + * + * Driver values in /sys/devices/system/cpu/cpufreq/lulzactive + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LULZACTIVE_VERSION (2) +#define LULZACTIVE_AUTHOR "tegrak" + +// if you changed some codes for optimization, just write your name here. +#define LULZACTIVE_TUNER "motley" + +static atomic_t active_count = ATOMIC_INIT(0); + +struct cpufreq_lulzactive_cpuinfo { + struct timer_list cpu_timer; + int timer_idlecancel; + u64 time_in_idle; + u64 idle_exit_time; + u64 timer_run_time; + int idling; + u64 freq_change_time; + u64 freq_change_time_in_idle; + struct cpufreq_policy *policy; + struct cpufreq_frequency_table *freq_table; + struct cpufreq_frequency_table lulzfreq_table[32]; + unsigned int lulzfreq_table_size; + unsigned int target_freq; + int governor_enabled; +}; + +static DEFINE_PER_CPU(struct cpufreq_lulzactive_cpuinfo, cpuinfo); + +/* Workqueues handle frequency scaling */ +static struct task_struct *up_task; +static struct workqueue_struct *down_wq; +static struct work_struct freq_scale_down_work; +static cpumask_t up_cpumask; +static spinlock_t up_cpumask_lock; +static cpumask_t down_cpumask; +static spinlock_t down_cpumask_lock; +static struct mutex set_speed_lock; + +/* + * The minimum amount of time to spend at a frequency before we can step up. + */ +#define DEFAULT_UP_SAMPLE_TIME 24 * USEC_PER_MSEC +static unsigned long up_sample_time; + +/* + * The minimum amount of time to spend at a frequency before we can step down. + */ +#define DEFAULT_DOWN_SAMPLE_TIME 49 * USEC_PER_MSEC +static unsigned long down_sample_time; + +/* + * CPU freq will be increased if measured load > inc_cpu_load; + */ +#define DEFAULT_INC_CPU_LOAD 60 +static unsigned long inc_cpu_load; + +/* + * CPU freq will be decreased if measured load < dec_cpu_load; + * not implemented yet. + */ +#define DEFAULT_DEC_CPU_LOAD 30 +static unsigned long dec_cpu_load; + +/* + * Increasing frequency table index + * zero disables and causes to always jump straight to max frequency. + */ +#define DEFAULT_PUMP_UP_STEP 1 +static unsigned long pump_up_step; + +/* + * Decreasing frequency table index + * zero disables and will calculate frequency according to load heuristic. + */ +#define DEFAULT_PUMP_DOWN_STEP 1 +static unsigned long pump_down_step; + +/* + * Use minimum frequency while suspended. + */ +static unsigned int early_suspended; + +#define SCREEN_OFF_LOWEST_STEP (7) +#define DEFAULT_SCREEN_OFF_MIN_STEP (SCREEN_OFF_LOWEST_STEP) +static unsigned long screen_off_min_step; + +static int cpufreq_governor_lulzactive(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_LULZACTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_lulzactive = { + .name = "lulzactive", + .governor = cpufreq_governor_lulzactive, + .max_transition_latency = 10000000, + .owner = THIS_MODULE, +}; + +static unsigned int get_lulzfreq_table_size(struct cpufreq_lulzactive_cpuinfo *pcpu) { + unsigned int size = 0, i; + for (i = 0; (pcpu->freq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = pcpu->freq_table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) continue; + pcpu->lulzfreq_table[size].index = i; //in case we need it later -gm + pcpu->lulzfreq_table[size].frequency = freq; + size++; + } + pcpu->lulzfreq_table[size].index = 0; + pcpu->lulzfreq_table[size].frequency = CPUFREQ_TABLE_END; + return size; +} + +static inline void fix_screen_off_min_step(struct cpufreq_lulzactive_cpuinfo *pcpu) { + if (pcpu->lulzfreq_table_size <= 0) { + screen_off_min_step = 0; + return; + } + + if (DEFAULT_SCREEN_OFF_MIN_STEP == screen_off_min_step) + for(screen_off_min_step=0; + pcpu->lulzfreq_table[screen_off_min_step].frequency != 500000; + screen_off_min_step++); + + if (screen_off_min_step >= pcpu->lulzfreq_table_size) + for(screen_off_min_step=0; + pcpu->lulzfreq_table[screen_off_min_step].frequency != 500000; + screen_off_min_step++); +} + +static inline unsigned int adjust_screen_off_freq( + struct cpufreq_lulzactive_cpuinfo *pcpu, unsigned int freq) { + + if (early_suspended && freq > pcpu->lulzfreq_table[screen_off_min_step].frequency) { + freq = pcpu->lulzfreq_table[screen_off_min_step].frequency; + pcpu->target_freq = pcpu->policy->cur; + + if (freq > pcpu->policy->max) + freq = pcpu->policy->max; + if (freq < pcpu->policy->min) + freq = pcpu->policy->min; + } + + return freq; +} + +static void cpufreq_lulzactive_timer(unsigned long data) +{ + unsigned int delta_idle; + unsigned int delta_time; + int cpu_load; + int load_since_change; + u64 time_in_idle; + u64 idle_exit_time; + struct cpufreq_lulzactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, data); + u64 now_idle; + unsigned int new_freq; + unsigned int index; + unsigned long flags; + int ret; + + smp_rmb(); + + if (!pcpu->governor_enabled) + goto exit; + + /* + * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, + * this lets idle exit know the current idle time sample has + * been processed, and idle exit can generate a new sample and + * re-arm the timer. This prevents a concurrent idle + * exit on that CPU from writing a new set of info at the same time + * the timer function runs (the timer function can't use that info + * until more time passes). + */ + time_in_idle = pcpu->time_in_idle; + idle_exit_time = pcpu->idle_exit_time; + now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); + smp_wmb(); + + /* If we raced with cancelling a timer, skip. */ + if (!idle_exit_time) + goto exit; + + delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + idle_exit_time); + + /* + * If timer ran less than 1ms after short-term sample started, retry. + */ + if (delta_time < 1000) + goto rearm; + + if (delta_idle > delta_time) + cpu_load = 0; + else + cpu_load = 100 * (delta_time - delta_idle) / delta_time; + + delta_idle = (unsigned int) cputime64_sub(now_idle, + pcpu->freq_change_time_in_idle); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + pcpu->freq_change_time); + + if ((delta_time == 0) || (delta_idle > delta_time)) + load_since_change = 0; + else + load_since_change = + 100 * (delta_time - delta_idle) / delta_time; + + /* + * Choose greater of short-term load (since last idle timer + * started or timer function re-armed itself) or long-term load + * (since last frequency change). + */ + if (load_since_change > cpu_load) + cpu_load = load_since_change; + + /* + * START lulzactive algorithm section + */ + if (cpu_load >= inc_cpu_load) { + if (pump_up_step && pcpu->policy->cur < pcpu->policy->max) { + ret = cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + pcpu->policy->cur, CPUFREQ_RELATION_H, + &index); + if (ret < 0) { + goto rearm; + } + + // apply pump_up_step by tegrak + index -= pump_up_step; + if (index < 0) + index = 0; + + new_freq = pcpu->lulzfreq_table[index].frequency; + } + else { + new_freq = pcpu->policy->max; + } + } + else { + if (pump_down_step) { + ret = cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + pcpu->policy->cur, CPUFREQ_RELATION_H, + &index); + if (ret < 0) { + goto rearm; + } + + // apply pump_down_step by tegrak + index += pump_down_step; + if (index >= pcpu->lulzfreq_table_size) { + index = pcpu->lulzfreq_table_size - 1; + } + + new_freq = (pcpu->policy->cur > pcpu->policy->min) ? + (pcpu->lulzfreq_table[index].frequency) : + (pcpu->policy->min); + } + else { + new_freq = pcpu->policy->max * cpu_load / 100; + ret = cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + new_freq, CPUFREQ_RELATION_H, + &index); + if (ret < 0) { + goto rearm; + } + new_freq = pcpu->lulzfreq_table[index].frequency; + } + } + + // adjust freq when screen off + new_freq = adjust_screen_off_freq(pcpu, new_freq); + + if (pcpu->target_freq == new_freq) + goto rearm_if_notmax; + + /* + * Do not scale down unless we have been at this frequency for the + * minimum sample time. + */ + if (new_freq < pcpu->target_freq) { + if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) + < down_sample_time) + goto rearm; + } + else { + if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) < + up_sample_time) { + /* don't reset timer */ + goto rearm; + } + } + + if (new_freq < pcpu->target_freq) { + pcpu->target_freq = new_freq; + spin_lock_irqsave(&down_cpumask_lock, flags); + cpumask_set_cpu(data, &down_cpumask); + spin_unlock_irqrestore(&down_cpumask_lock, flags); + queue_work(down_wq, &freq_scale_down_work); + } else { + pcpu->target_freq = new_freq; + spin_lock_irqsave(&up_cpumask_lock, flags); + cpumask_set_cpu(data, &up_cpumask); + spin_unlock_irqrestore(&up_cpumask_lock, flags); + wake_up_process(up_task); + } + +rearm_if_notmax: + /* + * Already set max speed and don't see a need to change that, + * wait until next idle to re-evaluate, don't need timer. + */ + if (pcpu->target_freq == pcpu->policy->max) + goto exit; + +rearm: + if (!timer_pending(&pcpu->cpu_timer)) { + /* + * If already at min: if that CPU is idle, don't set timer. + * Else cancel the timer if that CPU goes idle. We don't + * need to re-evaluate speed until the next idle exit. + */ + if (pcpu->target_freq == pcpu->policy->min) { + smp_rmb(); + + if (pcpu->idling) + goto exit; + + pcpu->timer_idlecancel = 1; + } + + pcpu->time_in_idle = get_cpu_idle_time_us( + data, &pcpu->idle_exit_time); + mod_timer(&pcpu->cpu_timer, + jiffies + 4); + } + +exit: + return; +} + +static void cpufreq_lulzactive_idle_start(void) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, smp_processor_id()); + int pending; + + if (!pcpu->governor_enabled) + return; + + pcpu->idling = 1; + smp_wmb(); + pending = timer_pending(&pcpu->cpu_timer); + + if (pcpu->target_freq != pcpu->policy->min) { +#ifdef CONFIG_SMP + /* + * Entering idle while not at lowest speed. On some + * platforms this can hold the other CPU(s) at that speed + * even though the CPU is idle. Set a timer to re-evaluate + * speed so this idle CPU doesn't hold the other CPUs above + * min indefinitely. This should probably be a quirk of + * the CPUFreq driver. + */ + if (!pending) { + pcpu->time_in_idle = get_cpu_idle_time_us( + smp_processor_id(), &pcpu->idle_exit_time); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, + jiffies + 4); + } +#endif + } else { + /* + * If at min speed and entering idle after load has + * already been evaluated, and a timer has been set just in + * case the CPU suddenly goes busy, cancel that timer. The + * CPU didn't go busy; we'll recheck things upon idle exit. + */ + if (pending && pcpu->timer_idlecancel) { + del_timer(&pcpu->cpu_timer); + /* + * Ensure last timer run time is after current idle + * sample start time, so next idle exit will always + * start a new idle sampling period. + */ + pcpu->idle_exit_time = 0; + pcpu->timer_idlecancel = 0; + } + } + +} + +static void cpufreq_lulzactive_idle_end(void) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu = + &per_cpu(cpuinfo, smp_processor_id()); + + pcpu->idling = 0; + smp_wmb(); + + /* + * Arm the timer for 1-2 ticks later if not already, and if the timer + * function has already processed the previous load sampling + * interval. (If the timer is not pending but has not processed + * the previous interval, it is probably racing with us on another + * CPU. Let it compute load based on the previous sample and then + * re-arm the timer for another interval when it's done, rather + * than updating the interval start time to be "now", which doesn't + * give the timer function enough time to make a decision on this + * run.) + */ + if (timer_pending(&pcpu->cpu_timer) == 0 && + pcpu->timer_run_time >= pcpu->idle_exit_time && + pcpu->governor_enabled) { + pcpu->time_in_idle = + get_cpu_idle_time_us(smp_processor_id(), + &pcpu->idle_exit_time); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, + jiffies + 4); + } + +} + +static int cpufreq_lulzactive_up_task(void *data) +{ + unsigned int cpu; + cpumask_t tmp_mask; + unsigned long flags; + struct cpufreq_lulzactive_cpuinfo *pcpu; + + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&up_cpumask_lock, flags); + + if (cpumask_empty(&up_cpumask)) { + spin_unlock_irqrestore(&up_cpumask_lock, flags); + schedule(); + + if (kthread_should_stop()) + break; + + spin_lock_irqsave(&up_cpumask_lock, flags); + } + + set_current_state(TASK_RUNNING); + tmp_mask = up_cpumask; + cpumask_clear(&up_cpumask); + spin_unlock_irqrestore(&up_cpumask_lock, flags); + + for_each_cpu(cpu, &tmp_mask) { + unsigned int j; + unsigned int max_freq = 0; + + pcpu = &per_cpu(cpuinfo, cpu); + smp_rmb(); + + if (!pcpu->governor_enabled) + continue; + + mutex_lock(&set_speed_lock); + + for_each_cpu(j, pcpu->policy->cpus) { + struct cpufreq_lulzactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, j); + + if (pjcpu->target_freq > max_freq) + max_freq = pjcpu->target_freq; + } + + if (max_freq != pcpu->policy->cur) + __cpufreq_driver_target(pcpu->policy, + max_freq, + CPUFREQ_RELATION_H); + mutex_unlock(&set_speed_lock); + } + } + + return 0; +} + +static void cpufreq_lulzactive_freq_down(struct work_struct *work) +{ + unsigned int cpu; + cpumask_t tmp_mask; + unsigned long flags; + struct cpufreq_lulzactive_cpuinfo *pcpu; + + spin_lock_irqsave(&down_cpumask_lock, flags); + tmp_mask = down_cpumask; + cpumask_clear(&down_cpumask); + spin_unlock_irqrestore(&down_cpumask_lock, flags); + + for_each_cpu(cpu, &tmp_mask) { + unsigned int j; + unsigned int max_freq = 0; + + pcpu = &per_cpu(cpuinfo, cpu); + smp_rmb(); + + if (!pcpu->governor_enabled) + continue; + + mutex_lock(&set_speed_lock); + + for_each_cpu(j, pcpu->policy->cpus) { + struct cpufreq_lulzactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, j); + + if (pjcpu->target_freq > max_freq) + max_freq = pjcpu->target_freq; + } + + if (max_freq != pcpu->policy->cur) + __cpufreq_driver_target(pcpu->policy, max_freq, + CPUFREQ_RELATION_H); + + mutex_unlock(&set_speed_lock); + } +} + +// inc_cpu_load +static ssize_t show_inc_cpu_load(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", inc_cpu_load); +} + +static ssize_t store_inc_cpu_load(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &inc_cpu_load)==-EINVAL) return -EINVAL; + + if (inc_cpu_load > 100) { + inc_cpu_load = 100; + } + else if (inc_cpu_load < 10) { + inc_cpu_load = 10; + } + return count; +} + +static struct global_attr inc_cpu_load_attr = __ATTR(inc_cpu_load, 0666, + show_inc_cpu_load, store_inc_cpu_load); + +// down_sample_time +static ssize_t show_down_sample_time(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", down_sample_time); +} + +static ssize_t store_down_sample_time(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &down_sample_time)==-EINVAL) return -EINVAL; + return count; +} + +static struct global_attr down_sample_time_attr = __ATTR(down_sample_time, 0666, + show_down_sample_time, store_down_sample_time); + +// up_sample_time +static ssize_t show_up_sample_time(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", up_sample_time); +} + +static ssize_t store_up_sample_time(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &up_sample_time)==-EINVAL) return -EINVAL; + return count; +} + +static struct global_attr up_sample_time_attr = __ATTR(up_sample_time, 0666, + show_up_sample_time, store_up_sample_time); + +// debug_mode +static ssize_t show_debug_mode(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "0\n"); +} + +static ssize_t store_debug_mode(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + return count; +} + +static struct global_attr debug_mode_attr = __ATTR(debug_mode, 0666, + show_debug_mode, store_debug_mode); + +// pump_up_step +static ssize_t show_pump_up_step(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", pump_up_step); +} + +static ssize_t store_pump_up_step(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if(strict_strtoul(buf, 0, &pump_up_step)==-EINVAL) return -EINVAL; + return count; +} + +static struct global_attr pump_up_step_attr = __ATTR(pump_up_step, 0666, + show_pump_up_step, store_pump_up_step); + +// pump_down_step +static ssize_t show_pump_down_step(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", pump_down_step); +} + +static ssize_t store_pump_down_step(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + + if(strict_strtoul(buf, 0, &pump_down_step)==-EINVAL) return -EINVAL; + + pcpu = &per_cpu(cpuinfo, 0); + // fix out of bound + if (pcpu->lulzfreq_table_size <= pump_down_step) { + pump_down_step = pcpu->lulzfreq_table_size - 1; + } + return count; +} + +static struct global_attr pump_down_step_attr = __ATTR(pump_down_step, 0666, + show_pump_down_step, store_pump_down_step); + +// screen_off_min_step +static ssize_t show_screen_off_min_step(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + + pcpu = &per_cpu(cpuinfo, 0); + fix_screen_off_min_step(pcpu); + + return sprintf(buf, "%lu\n", screen_off_min_step); +} + +static ssize_t store_screen_off_min_step(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + + if(strict_strtoul(buf, 0, &screen_off_min_step)==-EINVAL) return -EINVAL; + + pcpu = &per_cpu(cpuinfo, 0); + fix_screen_off_min_step(pcpu); + + return count; +} + +static struct global_attr screen_off_min_step_attr = __ATTR(screen_off_min_step, 0666, + show_screen_off_min_step, store_screen_off_min_step); + +// author +static ssize_t show_author(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", LULZACTIVE_AUTHOR); +} + +static struct global_attr author_attr = __ATTR(author, 0444, + show_author, NULL); + +// tuner +static ssize_t show_tuner(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", LULZACTIVE_TUNER); +} + +static struct global_attr tuner_attr = __ATTR(tuner, 0444, + show_tuner, NULL); + +// version +static ssize_t show_version(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", LULZACTIVE_VERSION); +} + +static struct global_attr version_attr = __ATTR(version, 0444, + show_version, NULL); + +// freq_table +static ssize_t show_freq_table(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpufreq_lulzactive_cpuinfo *pcpu; + char temp[64]; + int i; + + pcpu = &per_cpu(cpuinfo, 0); + + for (i = 0; i < pcpu->lulzfreq_table_size; i++) { + sprintf(temp, "%u\n", pcpu->lulzfreq_table[i].frequency); + strcat(buf, temp); + } + + return strlen(buf); +} + +static struct global_attr freq_table_attr = __ATTR(freq_table, 0444, + show_freq_table, NULL); + +static struct attribute *lulzactive_attributes[] = { + &inc_cpu_load_attr.attr, + &up_sample_time_attr.attr, + &down_sample_time_attr.attr, + &pump_up_step_attr.attr, + &pump_down_step_attr.attr, + &screen_off_min_step_attr.attr, + &debug_mode_attr.attr, + &author_attr.attr, + &tuner_attr.attr, + &version_attr.attr, + &freq_table_attr.attr, + NULL, +}; + +void start_lulzactive(void); +void stop_lulzactive(void); + +static struct attribute_group lulzactive_attr_group = { + .attrs = lulzactive_attributes, + .name = "lulzactive", +}; + +static int cpufreq_governor_lulzactive(struct cpufreq_policy *policy, + unsigned int event) +{ + int rc; + unsigned int j; + struct cpufreq_lulzactive_cpuinfo *pcpu; + struct cpufreq_frequency_table *freq_table; + + switch (event) { + case CPUFREQ_GOV_START: + if (!cpu_online(policy->cpu)) + return -EINVAL; + + freq_table = + cpufreq_frequency_get_table(policy->cpu); + + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + pcpu->policy = policy; + pcpu->target_freq = policy->cur; + pcpu->freq_table = freq_table; + pcpu->freq_change_time_in_idle = + get_cpu_idle_time_us(j, + &pcpu->freq_change_time); + pcpu->governor_enabled = 1; + smp_wmb(); + pcpu->lulzfreq_table_size = get_lulzfreq_table_size(pcpu); + + // fix invalid screen_off_min_step + fix_screen_off_min_step(pcpu); + } + + /* + * Do not register the idle hook and create sysfs + * entries if we have already done so. + */ + if (atomic_inc_return(&active_count) > 1) + return 0; + start_lulzactive(); + + rc = sysfs_create_group(cpufreq_global_kobject, + &lulzactive_attr_group); + if (rc) + return rc; + + break; + + case CPUFREQ_GOV_STOP: + for_each_cpu(j, policy->cpus) { + pcpu = &per_cpu(cpuinfo, j); + pcpu->governor_enabled = 0; + smp_wmb(); + del_timer_sync(&pcpu->cpu_timer); + + /* + * Reset idle exit time since we may cancel the timer + * before it can run after the last idle exit time, + * to avoid tripping the check in idle exit for a timer + * that is trying to run. + */ + pcpu->idle_exit_time = 0; + } + + flush_work(&freq_scale_down_work); + if (atomic_dec_return(&active_count) > 0) + return 0; + + sysfs_remove_group(cpufreq_global_kobject, + &lulzactive_attr_group); + stop_lulzactive(); + break; + + case CPUFREQ_GOV_LIMITS: + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, + policy->min, CPUFREQ_RELATION_L); + break; + } + return 0; +} + +static int cpufreq_lulzactive_idle_notifier(struct notifier_block *nb, + unsigned long val, + void *data) +{ + switch (val) { + case IDLE_START: + cpufreq_lulzactive_idle_start(); + break; + case IDLE_END: + cpufreq_lulzactive_idle_end(); + break; + } + + return 0; +} + +static struct notifier_block cpufreq_lulzactive_idle_nb = { + .notifier_call = cpufreq_lulzactive_idle_notifier, +}; + +static void lulzactive_early_suspend(struct early_suspend *handler) { + early_suspended = 1; +} + +static void lulzactive_late_resume(struct early_suspend *handler) { + early_suspended = 0; +} + +static struct early_suspend lulzactive_power_suspend = { + .suspend = lulzactive_early_suspend, + .resume = lulzactive_late_resume, + .level = EARLY_SUSPEND_LEVEL_DISABLE_FB + 1, +}; + +void start_lulzactive(void) +{ + //it is more appropriate to start the up_task thread after starting the governor -gm + unsigned int i, index500, index800; + struct cpufreq_lulzactive_cpuinfo *pcpu; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + if( pump_up_step == 0 ) + { + pcpu = &per_cpu(cpuinfo, 0); + cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + 500000, CPUFREQ_RELATION_H, + &index500); + cpufreq_frequency_table_target( + pcpu->policy, pcpu->lulzfreq_table, + 800000, CPUFREQ_RELATION_H, + &index800); + for(i=index800;ilulzfreq_table[i].frequency==CPUFREQ_ENTRY_INVALID) continue; + pump_up_step++; + } + } + if( pump_down_step == 0 ) + { + pump_down_step = pump_up_step; + } + + up_task = kthread_create(cpufreq_lulzactive_up_task, NULL, + "klulzactiveup"); + + sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m); + get_task_struct(up_task); + + idle_notifier_register(&cpufreq_lulzactive_idle_nb); + register_early_suspend(&lulzactive_power_suspend); +} + +void stop_lulzactive(void) +{ + //cleanup the thread after stopping the governor -gm + kthread_stop(up_task); + put_task_struct(up_task); + + idle_notifier_unregister(&cpufreq_lulzactive_idle_nb); + unregister_early_suspend(&lulzactive_power_suspend); + pump_up_step = DEFAULT_PUMP_UP_STEP; + pump_down_step = DEFAULT_PUMP_DOWN_STEP; +} + +static int __init cpufreq_lulzactive_init(void) +{ + unsigned int i; + struct cpufreq_lulzactive_cpuinfo *pcpu; + up_sample_time = DEFAULT_UP_SAMPLE_TIME; + down_sample_time = DEFAULT_DOWN_SAMPLE_TIME; + inc_cpu_load = DEFAULT_INC_CPU_LOAD; + dec_cpu_load = DEFAULT_DEC_CPU_LOAD; + pump_up_step = DEFAULT_PUMP_UP_STEP; + pump_down_step = DEFAULT_PUMP_DOWN_STEP; + early_suspended = 0; + screen_off_min_step = DEFAULT_SCREEN_OFF_MIN_STEP; + + + /* Initalize per-cpu timers */ + for_each_possible_cpu(i) { + pcpu = &per_cpu(cpuinfo, i); + init_timer(&pcpu->cpu_timer); + pcpu->cpu_timer.function = cpufreq_lulzactive_timer; + pcpu->cpu_timer.data = i; + } + + /* No rescuer thread, bind to CPU queuing the work for possibly + warm cache (probably doesn't matter much). */ + down_wq = alloc_workqueue("knteractive_down", 0, 1); + + if (!down_wq) + goto err_freeuptask; + + INIT_WORK(&freq_scale_down_work, + cpufreq_lulzactive_freq_down); + + spin_lock_init(&up_cpumask_lock); + spin_lock_init(&down_cpumask_lock); + mutex_init(&set_speed_lock); + + return cpufreq_register_governor(&cpufreq_gov_lulzactive); + +err_freeuptask: + put_task_struct(up_task); + return -ENOMEM; +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_LULZACTIVE +fs_initcall(cpufreq_lulzactive_init); +#else +module_init(cpufreq_lulzactive_init); +#endif + +static void __exit cpufreq_lulzactive_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_lulzactive); + kthread_stop(up_task); + put_task_struct(up_task); + destroy_workqueue(down_wq); +} + +module_exit(cpufreq_lulzactive_exit); + +MODULE_AUTHOR("Tegrak "); +MODULE_DESCRIPTION("'lulzactive' - improved interactive governor inspired by smartass"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq_pegasusq.c b/drivers/cpufreq/cpufreq_pegasusq.c new file mode 100644 index 00000000000..dbca0a62288 --- /dev/null +++ b/drivers/cpufreq/cpufreq_pegasusq.c @@ -0,0 +1,1413 @@ +/* + * drivers/cpufreq/cpufreq_pegasusq.c + * + * Copyright (C) 2011 Samsung Electronics co. ltd + * ByungChang Cha + * + * Based on ondemand governor + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HAS_EARLYSUSPEND +#include +#endif + +/* + * runqueue average + */ + +#define RQ_AVG_TIMER_RATE 10 + +struct runqueue_data { + unsigned int nr_run_avg; + unsigned int update_rate; + int64_t last_time; + int64_t total_time; + struct delayed_work work; + struct workqueue_struct *nr_run_wq; + spinlock_t lock; +}; + +static struct runqueue_data *rq_data; +static void rq_work_fn(struct work_struct *work); + +static void start_rq_work(void) +{ + rq_data->nr_run_avg = 0; + rq_data->last_time = 0; + rq_data->total_time = 0; + if (rq_data->nr_run_wq == NULL) + rq_data->nr_run_wq = + create_singlethread_workqueue("nr_run_avg"); + + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + return; +} + +static void stop_rq_work(void) +{ + if (rq_data->nr_run_wq) + cancel_delayed_work(&rq_data->work); + return; +} + +static int __init init_rq_avg(void) +{ + rq_data = kzalloc(sizeof(struct runqueue_data), GFP_KERNEL); + if (rq_data == NULL) { + pr_err("%s cannot allocate memory\n", __func__); + return -ENOMEM; + } + spin_lock_init(&rq_data->lock); + rq_data->update_rate = RQ_AVG_TIMER_RATE; + INIT_DELAYED_WORK_DEFERRABLE(&rq_data->work, rq_work_fn); + + return 0; +} + +static void rq_work_fn(struct work_struct *work) +{ + int64_t time_diff = 0; + int64_t nr_run = 0; + unsigned long flags = 0; + int64_t cur_time = ktime_to_ns(ktime_get()); + + spin_lock_irqsave(&rq_data->lock, flags); + + if (rq_data->last_time == 0) + rq_data->last_time = cur_time; + if (rq_data->nr_run_avg == 0) + rq_data->total_time = 0; + + nr_run = nr_running() * 100; + time_diff = cur_time - rq_data->last_time; + do_div(time_diff, 1000 * 1000); + + if (time_diff != 0 && rq_data->total_time != 0) { + nr_run = (nr_run * time_diff) + + (rq_data->nr_run_avg * rq_data->total_time); + do_div(nr_run, rq_data->total_time + time_diff); + } + rq_data->nr_run_avg = nr_run; + rq_data->total_time += time_diff; + rq_data->last_time = cur_time; + + if (rq_data->update_rate != 0) + queue_delayed_work(rq_data->nr_run_wq, &rq_data->work, + msecs_to_jiffies(rq_data->update_rate)); + + spin_unlock_irqrestore(&rq_data->lock, flags); +} + +static unsigned int get_nr_run_avg(void) +{ + unsigned int nr_run_avg; + unsigned long flags = 0; + + spin_lock_irqsave(&rq_data->lock, flags); + nr_run_avg = rq_data->nr_run_avg; + rq_data->nr_run_avg = 0; + spin_unlock_irqrestore(&rq_data->lock, flags); + + return nr_run_avg; +} + + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_SAMPLING_DOWN_FACTOR (2) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_SAMPLING_RATE (50000) +#define MIN_SAMPLING_RATE (10000) +#define MAX_HOTPLUG_RATE (40u) + +#define DEF_MAX_CPU_LOCK (0) +#define DEF_UP_NR_CPUS (1) +#define DEF_CPU_UP_RATE (10) +#define DEF_CPU_DOWN_RATE (20) +#define DEF_FREQ_STEP (40) +#define DEF_START_DELAY (0) + +#define UP_THRESHOLD_AT_MIN_FREQ (40) +#define FREQ_FOR_RESPONSIVENESS (500000) + +#define HOTPLUG_DOWN_INDEX (0) +#define HOTPLUG_UP_INDEX (1) + +#ifdef CONFIG_MACH_MIDAS +static int hotplug_rq[4][2] = { + {0, 200}, {200, 300}, {300, 400}, {400, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 500000}, + {400000, 500000}, + {400000, 800000}, + {600000, 0} +}; +#else +static int hotplug_rq[4][2] = { + {0, 100}, {100, 200}, {200, 300}, {300, 0} +}; + +static int hotplug_freq[4][2] = { + {0, 500000}, + {200000, 500000}, + {200000, 500000}, + {200000, 0} +}; +#endif + +static unsigned int min_sampling_rate; + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ +static +#endif +struct cpufreq_governor cpufreq_gov_pegasusq = { + .name = "pegasusq", + .governor = cpufreq_governor_dbs, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct work_struct up_work; + struct work_struct down_work; + struct cpufreq_frequency_table *freq_table; + unsigned int rate_mult; + int cpu; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); + +struct workqueue_struct *dvfs_workqueue; + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int io_is_busy; + /* pegasusq tuners */ + unsigned int freq_step; + unsigned int cpu_up_rate; + unsigned int cpu_down_rate; + unsigned int up_nr_cpus; + unsigned int max_cpu_lock; + atomic_t hotplug_lock; + unsigned int dvfs_debug; + unsigned int max_freq; + unsigned int min_freq; +#ifdef CONFIG_HAS_EARLYSUSPEND + int early_suspend; +#endif + unsigned int up_threshold_at_min_freq; + unsigned int freq_for_responsiveness; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, + .freq_step = DEF_FREQ_STEP, + .cpu_up_rate = DEF_CPU_UP_RATE, + .cpu_down_rate = DEF_CPU_DOWN_RATE, + .up_nr_cpus = DEF_UP_NR_CPUS, + .max_cpu_lock = DEF_MAX_CPU_LOCK, + .hotplug_lock = ATOMIC_INIT(0), + .dvfs_debug = 0, +#ifdef CONFIG_HAS_EARLYSUSPEND + .early_suspend = -1, +#endif + .up_threshold_at_min_freq = UP_THRESHOLD_AT_MIN_FREQ, + .freq_for_responsiveness = FREQ_FOR_RESPONSIVENESS, +}; + + +/* + * CPU hotplug lock interface + */ + +static atomic_t g_hotplug_count = ATOMIC_INIT(0); +static atomic_t g_hotplug_lock = ATOMIC_INIT(0); + +static void apply_hotplug_lock(void) +{ + int online, possible, lock, flag; + struct work_struct *work; + struct cpu_dbs_info_s *dbs_info; + + /* do turn_on/off cpus */ + dbs_info = &per_cpu(od_cpu_dbs_info, 0); /* from CPU0 */ + online = num_online_cpus(); + possible = num_possible_cpus(); + lock = atomic_read(&g_hotplug_lock); + flag = lock - online; + + if (flag == 0) + return; + + work = flag > 0 ? &dbs_info->up_work : &dbs_info->down_work; + + pr_debug("%s online %d possible %d lock %d flag %d %d\n", + __func__, online, possible, lock, flag, (int)abs(flag)); + + queue_work_on(dbs_info->cpu, dvfs_workqueue, work); +} + +int cpufreq_pegasusq_cpu_lock(int num_core) +{ + int prev_lock; + + if (num_core < 1 || num_core > num_possible_cpus()) + return -EINVAL; + + prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock != 0 && prev_lock < num_core) + return -EINVAL; + else if (prev_lock == num_core) + atomic_inc(&g_hotplug_count); + + atomic_set(&g_hotplug_lock, num_core); + atomic_set(&g_hotplug_count, 1); + apply_hotplug_lock(); + + return 0; +} + +int cpufreq_pegasusq_cpu_unlock(int num_core) +{ + int prev_lock = atomic_read(&g_hotplug_lock); + + if (prev_lock < num_core) + return 0; + else if (prev_lock == num_core) + atomic_dec(&g_hotplug_count); + + if (atomic_read(&g_hotplug_count) == 0) + atomic_set(&g_hotplug_lock, 0); + + return 0; +} + + +/* + * History of CPU usage + */ +struct cpu_usage { + unsigned int freq; + unsigned int load[NR_CPUS]; + unsigned int rq_avg; +}; + +struct cpu_usage_history { + struct cpu_usage usage[MAX_HOTPLUG_RATE]; + unsigned int num_hist; +}; + +struct cpu_usage_history *hotplug_history; + +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) +{ + cputime64_t idle_time; + cputime64_t cur_wall_time; + cputime64_t busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, + kstat_cpu(cpu).cpustat.system); + + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); + + idle_time = cputime64_sub(cur_wall_time, busy_time); + if (wall) + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); + + return (cputime64_t)jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + return idle_time; +} + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, + cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_pegasusq Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(up_threshold, up_threshold); +show_one(sampling_down_factor, sampling_down_factor); +show_one(ignore_nice_load, ignore_nice); +show_one(down_differential, down_differential); +show_one(freq_step, freq_step); +show_one(cpu_up_rate, cpu_up_rate); +show_one(cpu_down_rate, cpu_down_rate); +show_one(up_nr_cpus, up_nr_cpus); +show_one(max_cpu_lock, max_cpu_lock); +show_one(dvfs_debug, dvfs_debug); +show_one(up_threshold_at_min_freq, up_threshold_at_min_freq); +show_one(freq_for_responsiveness, freq_for_responsiveness); +static ssize_t show_hotplug_lock(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", atomic_read(&g_hotplug_lock)); +} + +#define show_hotplug_param(file_name, num_core, up_down) \ +static ssize_t show_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", file_name[num_core - 1][up_down]); \ +} + +#define store_hotplug_param(file_name, num_core, up_down) \ +static ssize_t store_##file_name##_##num_core##_##up_down \ +(struct kobject *kobj, struct attribute *attr, \ + const char *buf, size_t count) \ +{ \ + unsigned int input; \ + int ret; \ + ret = sscanf(buf, "%u", &input); \ + if (ret != 1) \ + return -EINVAL; \ + file_name[num_core - 1][up_down] = input; \ + return count; \ +} + +show_hotplug_param(hotplug_freq, 1, 1); +show_hotplug_param(hotplug_freq, 2, 0); +show_hotplug_param(hotplug_freq, 2, 1); +show_hotplug_param(hotplug_freq, 3, 0); +show_hotplug_param(hotplug_freq, 3, 1); +show_hotplug_param(hotplug_freq, 4, 0); + +show_hotplug_param(hotplug_rq, 1, 1); +show_hotplug_param(hotplug_rq, 2, 0); +show_hotplug_param(hotplug_rq, 2, 1); +show_hotplug_param(hotplug_rq, 3, 0); +show_hotplug_param(hotplug_rq, 3, 1); +show_hotplug_param(hotplug_rq, 4, 0); + +store_hotplug_param(hotplug_freq, 1, 1); +store_hotplug_param(hotplug_freq, 2, 0); +store_hotplug_param(hotplug_freq, 2, 1); +store_hotplug_param(hotplug_freq, 3, 0); +store_hotplug_param(hotplug_freq, 3, 1); +store_hotplug_param(hotplug_freq, 4, 0); + +store_hotplug_param(hotplug_rq, 1, 1); +store_hotplug_param(hotplug_rq, 2, 0); +store_hotplug_param(hotplug_rq, 2, 1); +store_hotplug_param(hotplug_rq, 3, 0); +store_hotplug_param(hotplug_rq, 3, 1); +store_hotplug_param(hotplug_rq, 4, 0); + +define_one_global_rw(hotplug_freq_1_1); +define_one_global_rw(hotplug_freq_2_0); +define_one_global_rw(hotplug_freq_2_1); +define_one_global_rw(hotplug_freq_3_0); +define_one_global_rw(hotplug_freq_3_1); +define_one_global_rw(hotplug_freq_4_0); + +define_one_global_rw(hotplug_rq_1_1); +define_one_global_rw(hotplug_rq_2_0); +define_one_global_rw(hotplug_rq_2_1); +define_one_global_rw(hotplug_rq_3_0); +define_one_global_rw(hotplug_rq_3_1); +define_one_global_rw(hotplug_rq_4_0); + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.io_is_busy = !!input; + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = + get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + } + return count; +} + +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.down_differential = min(input, 100u); + return count; +} + +static ssize_t store_freq_step(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_step = min(input, 100u); + return count; +} + +static ssize_t store_cpu_up_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_up_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + +static ssize_t store_cpu_down_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.cpu_down_rate = min(input, MAX_HOTPLUG_RATE); + return count; +} + + +static ssize_t store_up_nr_cpus(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.up_nr_cpus = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_max_cpu_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.max_cpu_lock = min(input, num_possible_cpus()); + return count; +} + +static ssize_t store_hotplug_lock(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + int prev_lock; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + input = min(input, num_possible_cpus()); + prev_lock = atomic_read(&dbs_tuners_ins.hotplug_lock); + + if (prev_lock) + cpufreq_pegasusq_cpu_unlock(prev_lock); + + if (input == 0) { + atomic_set(&dbs_tuners_ins.hotplug_lock, 0); + return count; + } + + ret = cpufreq_pegasusq_cpu_lock(input); + if (ret) { + printk(KERN_ERR "[HOTPLUG] already locked with smaller value %d < %d\n", + atomic_read(&g_hotplug_lock), input); + return ret; + } + + atomic_set(&dbs_tuners_ins.hotplug_lock, input); + + return count; +} + +static ssize_t store_dvfs_debug(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.dvfs_debug = input > 0; + return count; +} + +static ssize_t store_up_threshold_at_min_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold_at_min_freq = input; + return count; +} + +static ssize_t store_freq_for_responsiveness(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.freq_for_responsiveness = input; + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(up_threshold); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(down_differential); +define_one_global_rw(freq_step); +define_one_global_rw(cpu_up_rate); +define_one_global_rw(cpu_down_rate); +define_one_global_rw(up_nr_cpus); +define_one_global_rw(max_cpu_lock); +define_one_global_rw(hotplug_lock); +define_one_global_rw(dvfs_debug); +define_one_global_rw(up_threshold_at_min_freq); +define_one_global_rw(freq_for_responsiveness); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &io_is_busy.attr, + &down_differential.attr, + &freq_step.attr, + &cpu_up_rate.attr, + &cpu_down_rate.attr, + &up_nr_cpus.attr, + /* priority: hotplug_lock > max_cpu_lock */ + &max_cpu_lock.attr, + &hotplug_lock.attr, + &dvfs_debug.attr, + &hotplug_freq_1_1.attr, + &hotplug_freq_2_0.attr, + &hotplug_freq_2_1.attr, + &hotplug_freq_3_0.attr, + &hotplug_freq_3_1.attr, + &hotplug_freq_4_0.attr, + &hotplug_rq_1_1.attr, + &hotplug_rq_2_0.attr, + &hotplug_rq_2_1.attr, + &hotplug_rq_3_0.attr, + &hotplug_rq_3_1.attr, + &hotplug_rq_4_0.attr, + &up_threshold_at_min_freq.attr, + &freq_for_responsiveness.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "pegasusq", +}; + +/************************** sysfs end ************************/ + +static void cpu_up_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_up = dbs_tuners_ins.up_nr_cpus; + int hotplug_lock = atomic_read(&g_hotplug_lock); + if (hotplug_lock) + nr_up = hotplug_lock - online; + + if (online == 1) { + printk(KERN_ERR "CPU_UP 3\n"); + cpu_up(num_possible_cpus() - 1); + nr_up -= 1; + } + + for_each_cpu_not(cpu, cpu_online_mask) { + if (nr_up-- == 0) + break; + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_UP %d\n", cpu); + cpu_up(cpu); + } +} + +static void cpu_down_work(struct work_struct *work) +{ + int cpu; + int online = num_online_cpus(); + int nr_down = 1; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock) + nr_down = online - hotplug_lock; + + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + printk(KERN_ERR "CPU_DOWN %d\n", cpu); + cpu_down(cpu); + if (--nr_down == 0) + break; + } +} + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ +#ifndef CONFIG_ARCH_EXYNOS4 + if (p->cur == p->max) + return; +#endif + + __cpufreq_driver_target(p, freq, CPUFREQ_RELATION_L); +} + +/* + * print hotplug debugging info. + * which 1 : UP, 0 : DOWN + */ +static void debug_hotplug_check(int which, int rq_avg, int freq, + struct cpu_usage *usage) +{ + int cpu; + printk(KERN_ERR "CHECK %s rq %d.%02d freq %d [", which ? "up" : "down", + rq_avg / 100, rq_avg % 100, freq); + for_each_online_cpu(cpu) { + printk(KERN_ERR "(%d, %d), ", cpu, usage->load[cpu]); + } + printk(KERN_ERR "]\n"); +} + +static int check_up(void) +{ + int num_hist = hotplug_history->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int i; + int up_rate = dbs_tuners_ins.cpu_up_rate; + int up_freq, up_rq; + int min_freq = INT_MAX; + int min_rq_avg = INT_MAX; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + up_freq = hotplug_freq[online - 1][HOTPLUG_UP_INDEX]; + up_rq = hotplug_rq[online - 1][HOTPLUG_UP_INDEX]; + + if (online == num_possible_cpus()) + return 0; + if (dbs_tuners_ins.max_cpu_lock != 0 + && online >= dbs_tuners_ins.max_cpu_lock) + return 0; + + if (num_hist == 0 || num_hist % up_rate) + return 0; + + for (i = num_hist - 1; i >= num_hist - up_rate; --i) { + usage = &hotplug_history->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + + min_freq = min(min_freq, freq); + min_rq_avg = min(min_rq_avg, rq_avg); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(1, rq_avg, freq, usage); + } + + if (min_freq >= up_freq && min_rq_avg > up_rq) { + printk(KERN_ERR "[HOTPLUG IN] %s %d>=%d && %d>%d\n", + __func__, min_freq, up_freq, min_rq_avg, up_rq); + hotplug_history->num_hist = 0; + return 1; + } + return 0; +} + +static int check_down(void) +{ + int num_hist = hotplug_history->num_hist; + struct cpu_usage *usage; + int freq, rq_avg; + int i; + int down_rate = dbs_tuners_ins.cpu_down_rate; + int down_freq, down_rq; + int max_freq = 0; + int max_rq_avg = 0; + int online; + int hotplug_lock = atomic_read(&g_hotplug_lock); + + if (hotplug_lock > 0) + return 0; + + online = num_online_cpus(); + down_freq = hotplug_freq[online - 1][HOTPLUG_DOWN_INDEX]; + down_rq = hotplug_rq[online - 1][HOTPLUG_DOWN_INDEX]; + + if (online == 1) + return 0; + + if (dbs_tuners_ins.max_cpu_lock != 0 + && online > dbs_tuners_ins.max_cpu_lock) + return 1; + + if (num_hist == 0 || num_hist % down_rate) + return 0; + + for (i = num_hist - 1; i >= num_hist - down_rate; --i) { + usage = &hotplug_history->usage[i]; + + freq = usage->freq; + rq_avg = usage->rq_avg; + + max_freq = max(max_freq, freq); + max_rq_avg = max(max_rq_avg, rq_avg); + + if (dbs_tuners_ins.dvfs_debug) + debug_hotplug_check(0, rq_avg, freq, usage); + } + + if (max_freq <= down_freq && max_rq_avg <= down_rq) { + printk(KERN_ERR "[HOTPLUG OUT] %s %d<=%d && %d<%d\n", + __func__, max_freq, down_freq, max_rq_avg, down_rq); + hotplug_history->num_hist = 0; + return 1; + } + + return 0; +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + + struct cpufreq_policy *policy; + unsigned int j; + int num_hist = hotplug_history->num_hist; + int max_hotplug_rate = max(dbs_tuners_ins.cpu_up_rate, + dbs_tuners_ins.cpu_down_rate); + int up_threshold = dbs_tuners_ins.up_threshold; + + policy = this_dbs_info->cur_policy; + + hotplug_history->usage[num_hist].freq = policy->cur; + hotplug_history->usage[num_hist].rq_avg = get_nr_run_avg(); + ++hotplug_history->num_hist; + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + cputime64_t prev_wall_time, prev_idle_time, prev_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load, load_freq; + int freq_avg; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + prev_wall_time = j_dbs_info->prev_cpu_wall; + prev_idle_time = j_dbs_info->prev_cpu_idle; + prev_iowait_time = j_dbs_info->prev_cpu_iowait; + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + prev_wall_time); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + prev_idle_time); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + prev_iowait_time); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, + j_dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + hotplug_history->usage[num_hist].load[j] = load; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) + max_load_freq = load_freq; + } + + /* Check for CPU hotplug */ + if (check_up()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueue, + &this_dbs_info->up_work); + } else if (check_down()) { + queue_work_on(this_dbs_info->cpu, dvfs_workqueue, + &this_dbs_info->down_work); + } + if (hotplug_history->num_hist == max_hotplug_rate) + hotplug_history->num_hist = 0; + + /* Check for frequency increase */ + if (policy->cur < dbs_tuners_ins.freq_for_responsiveness) { + up_threshold = dbs_tuners_ins.up_threshold_at_min_freq; + } + + if (max_load_freq > up_threshold * policy->cur) { + int inc = (policy->max * dbs_tuners_ins.freq_step) / 100; + int target = min(policy->max, policy->cur + inc); + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max && target == policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + dbs_freq_increase(policy, target); + return; + } + + /* Check for frequency decrease */ +#ifndef CONFIG_ARCH_EXYNOS4 + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; +#endif + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus DOWN_DIFFERENTIAL points under + * the threshold. + */ + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { + unsigned int freq_next; + unsigned int down_thres; + + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + /* No longer fully busy, reset rate_mult */ + this_dbs_info->rate_mult = 1; + + if (freq_next < policy->min) + freq_next = policy->min; + + + down_thres = dbs_tuners_ins.up_threshold_at_min_freq + - dbs_tuners_ins.down_differential; + + if (freq_next < dbs_tuners_ins.freq_for_responsiveness + && (max_load_freq / freq_next) > down_thres) + freq_next = dbs_tuners_ins.freq_for_responsiveness; + + if (policy->cur == freq_next) + return; + + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + dbs_check_cpu(dbs_info); + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + queue_delayed_work_on(cpu, dvfs_workqueue, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(DEF_START_DELAY * 1000 * 1000 + + dbs_tuners_ins.sampling_rate); + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + INIT_WORK(&dbs_info->up_work, cpu_up_work); + INIT_WORK(&dbs_info->down_work, cpu_down_work); + + queue_delayed_work_on(dbs_info->cpu, dvfs_workqueue, + &dbs_info->work, delay + 2 * HZ); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); + cancel_work_sync(&dbs_info->up_work); + cancel_work_sync(&dbs_info->down_work); +} + +static int pm_notifier_call(struct notifier_block *this, + unsigned long event, void *ptr) +{ + static unsigned int prev_hotplug_lock; + switch (event) { + case PM_SUSPEND_PREPARE: + prev_hotplug_lock = atomic_read(&g_hotplug_lock); + atomic_set(&g_hotplug_lock, 1); + apply_hotplug_lock(); + pr_debug("%s enter suspend\n", __func__); + return NOTIFY_OK; + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + atomic_set(&g_hotplug_lock, prev_hotplug_lock); + if (prev_hotplug_lock) + apply_hotplug_lock(); + prev_hotplug_lock = 0; + pr_debug("%s exit suspend\n", __func__); + return NOTIFY_OK; + } + return NOTIFY_DONE; +} + +static struct notifier_block pm_notifier = { + .notifier_call = pm_notifier_call, +}; + +static int reboot_notifier_call(struct notifier_block *this, + unsigned long code, void *_cmd) +{ + atomic_set(&g_hotplug_lock, 1); + return NOTIFY_DONE; +} + +static struct notifier_block reboot_notifier = { + .notifier_call = reboot_notifier_call, +}; + +#ifdef CONFIG_HAS_EARLYSUSPEND +static struct early_suspend early_suspend; +unsigned int prev_freq_step; +unsigned int prev_sampling_rate; +static void cpufreq_pegasusq_early_suspend(struct early_suspend *h) +{ + dbs_tuners_ins.early_suspend = + atomic_read(&g_hotplug_lock); + prev_freq_step = dbs_tuners_ins.freq_step; + prev_sampling_rate = dbs_tuners_ins.sampling_rate; + dbs_tuners_ins.freq_step = 20; + dbs_tuners_ins.sampling_rate *= 4; + atomic_set(&g_hotplug_lock, 1); + apply_hotplug_lock(); + stop_rq_work(); +} +static void cpufreq_pegasusq_late_resume(struct early_suspend *h) +{ + atomic_set(&g_hotplug_lock, dbs_tuners_ins.early_suspend); + dbs_tuners_ins.early_suspend = -1; + dbs_tuners_ins.freq_step = prev_freq_step; + dbs_tuners_ins.sampling_rate = prev_sampling_rate; + apply_hotplug_lock(); + start_rq_work(); +} +#endif + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + dbs_tuners_ins.max_freq = policy->max; + dbs_tuners_ins.min_freq = policy->min; + hotplug_history->num_hist = 0; + start_rq_work(); + + mutex_lock(&dbs_mutex); + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kstat_cpu(j).cpustat.nice; + } + } + this_dbs_info->cpu = cpu; + this_dbs_info->rate_mult = 1; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + min_sampling_rate = MIN_SAMPLING_RATE; + dbs_tuners_ins.sampling_rate = DEF_SAMPLING_RATE; + dbs_tuners_ins.io_is_busy = 0; + } + mutex_unlock(&dbs_mutex); + + register_reboot_notifier(&reboot_notifier); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + +#ifdef CONFIG_HAS_EARLYSUSPEND + register_early_suspend(&early_suspend); +#endif + break; + + case CPUFREQ_GOV_STOP: +#ifdef CONFIG_HAS_EARLYSUSPEND + unregister_early_suspend(&early_suspend); +#endif + + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + + unregister_reboot_notifier(&reboot_notifier); + + dbs_enable--; + mutex_unlock(&dbs_mutex); + + stop_rq_work(); + + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, + CPUFREQ_RELATION_L); + + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static int __init cpufreq_gov_dbs_init(void) +{ + int ret; + + ret = init_rq_avg(); + if (ret) + return ret; + + hotplug_history = kzalloc(sizeof(struct cpu_usage_history), GFP_KERNEL); + if (!hotplug_history) { + pr_err("%s cannot create hotplug history array\n", __func__); + ret = -ENOMEM; + goto err_hist; + } + + dvfs_workqueue = create_workqueue("kpegasusq"); + if (!dvfs_workqueue) { + pr_err("%s cannot create workqueue\n", __func__); + ret = -ENOMEM; + goto err_queue; + } + + ret = cpufreq_register_governor(&cpufreq_gov_pegasusq); + if (ret) + goto err_reg; + +#ifdef CONFIG_HAS_EARLYSUSPEND + early_suspend.level = EARLY_SUSPEND_LEVEL_DISABLE_FB; + early_suspend.suspend = cpufreq_pegasusq_early_suspend; + early_suspend.resume = cpufreq_pegasusq_late_resume; +#endif + + return ret; + +err_reg: + destroy_workqueue(dvfs_workqueue); +err_queue: + kfree(hotplug_history); +err_hist: + kfree(rq_data); + return ret; +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_pegasusq); + destroy_workqueue(dvfs_workqueue); + kfree(hotplug_history); + kfree(rq_data); +} + +MODULE_AUTHOR("ByungChang Cha "); +MODULE_DESCRIPTION("'cpufreq_pegasusq' - A dynamic cpufreq/cpuhotplug governor"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 03a7c6f5786..45ff4905d1b 100755 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -370,6 +370,9 @@ extern struct cpufreq_governor cpufreq_gov_conservative; #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE) extern struct cpufreq_governor cpufreq_gov_interactive; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_interactive) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ) +extern struct cpufreq_governor cpufreq_gov_pegasusq; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_pegasusq) #endif From c405544cfecce878fd9be5bda922e8c73892a659 Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 29 Jul 2012 22:09:35 -0400 Subject: [PATCH 033/678] Only log temp when it is getting closer to Dynamic EDP limit to reduce kernel log spam Conflicts: drivers/misc/nct1008.c --- drivers/misc/nct1008.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/misc/nct1008.c b/drivers/misc/nct1008.c index 735df920055..98f2abfed15 100755 --- a/drivers/misc/nct1008.c +++ b/drivers/misc/nct1008.c @@ -129,7 +129,10 @@ static int nct1008_get_temp(struct device *dev, long *pTemp) /* Return max between Local and External Temp */ *pTemp = max(temp_local_milli, temp_ext_milli); - printk("%s: ret temp=%dC \n", __func__, MILLICELSIUS_TO_CELSIUS(*pTemp)); + + /* Only log when temp is getting closer to Dynamic EDP limit */ + if (MILLICELSIUS_TO_CELSIUS(*pTemp) > 56) + pr_info("%s: ret temp=%dC \n", __func__, MILLICELSIUS_TO_CELSIUS(*pTemp)); return 0; error: dev_err(&client->dev, "\n error in file=: %s %s() line=%d: " From f93453f4db91d8410018551d3107a3f1c4aa6b8f Mon Sep 17 00:00:00 2001 From: motley Date: Sun, 29 Jul 2012 23:03:15 -0400 Subject: [PATCH 034/678] CPU OC to 1.624GHz - higher end CPU frequencies are now at 1408, 1504, 1600, and 1624 (old 1400, 1500, 1600, N/A) DVFS table tweaks Added a GPU OC kernel config choice switch to allow compile time selection of GPU speed (446, 484, 500, or 520MHz). defconfig: -NTFS r/w enabled -PegasusQ governor no longer built in, but code remains if we want to look further into it later -Default GPU OC is 484MHz --- arch/arm/configs/motley_grouper_defconfig | 10 ++- arch/arm/mach-tegra/Kconfig | 25 ++++++- arch/arm/mach-tegra/cpu-tegra.c | 5 +- arch/arm/mach-tegra/tegra3_clocks.c | 14 ++-- arch/arm/mach-tegra/tegra3_dvfs.c | 85 +++++++++++++++++++++-- arch/arm/mach-tegra/tegra3_speedo.c | 2 +- drivers/cpufreq/cpufreq.c | 6 +- 7 files changed, 123 insertions(+), 24 deletions(-) diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index 654d5159941..c94cdc707a4 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -311,6 +311,10 @@ CONFIG_TEGRA_FIQ_DEBUGGER=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_VOLTAGE_CONTROL=y CONFIG_GPU_OVERCLOCK=y +# CONFIG_GPU_OC_446 is not set +CONFIG_GPU_OC_484=y +# CONFIG_GPU_OC_500 is not set +# CONFIG_GPU_OC_520 is not set CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y CONFIG_TEGRA_IOVMM_SMMU=y @@ -511,7 +515,7 @@ CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_INTERACTIVE=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y # CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set -CONFIG_CPU_FREQ_GOV_PEGASUSQ=y +# CONFIG_CPU_FREQ_GOV_PEGASUSQ is not set # # ARM CPU frequency scaling drivers @@ -2959,7 +2963,7 @@ CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_NTFS_FS=y # CONFIG_NTFS_DEBUG is not set -# CONFIG_NTFS_RW is not set +CONFIG_NTFS_RW=y # # Pseudo filesystems @@ -3343,4 +3347,4 @@ CONFIG_HAS_DMA=y CONFIG_CPU_RMAP=y CONFIG_NLATTR=y # CONFIG_AVERAGE is not set -# CONFIG_CORDIC is not set \ No newline at end of file +# CONFIG_CORDIC is not set diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 4cfca6df517..8c0cae34b59 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -283,8 +283,31 @@ config GPU_OVERCLOCK depends on TEGRA_SILICON_PLATFORM default n help - Choose y to overclock the GPU + Choose y to overclock the GPU. + If Off, clock speed is 416MHz. + If On, GPU clock speed can be selected. +choice + + depends on GPU_OVERCLOCK + prompt "Maximum GPU Rate" + default GPU_OC_484 + ---help--- + Select the desired GPU overclock rate. + + If you are not sure what you are doing, leave this + option alone! + config GPU_OC_446 + bool "446 MHz" + config GPU_OC_484 + bool "484 MHz" + config GPU_OC_500 + bool "500 MHz" + config GPU_OC_520 + bool "520 MHz" + +endchoice + config TEGRA_CPU_DVFS bool "Enable voltage scaling on Tegra CPU" depends on TEGRA_SILICON_PLATFORM diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index b14c868a1b0..f17a15025f4 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -354,9 +354,10 @@ static int tegra_cpu_edp_notify( cpu_clear(cpu, edp_cpumask); edp_update_limit(); } + if (new_speed > 1000000) + printk(KERN_DEBUG "tegra CPU:%sforce EDP limit %u kHz" + "\n", ret ? " failed to " : " ", new_speed); - printk(KERN_DEBUG "tegra CPU:%sforce EDP limit %u kHz" - "\n", ret ? " failed to " : " ", new_speed); } mutex_unlock(&tegra_cpu_lock); break; diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index f65ffd7177d..1188f52d5df 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4636,17 +4636,17 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 0, 51000 }, { 1, 102000 }, { 2, 204000 }, - { 3, 370000 }, + { 3, 340000 }, { 4, 475000 }, { 5, 640000 }, - { 6, 760000 }, - { 7, 910000 }, - { 8, 1150000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1200000 }, { 9, 1300000 }, - {10, 1400000 }, - {11, 1500000 }, + {10, 1408000 }, + {11, 1504000 }, {12, 1600000 }, - {13, 1700000 }, + {13, 1624000 }, {14, CPUFREQ_TABLE_END }, }; diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 6716554a8d0..a339aab2a6a 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -41,7 +41,7 @@ static const int cpu_millivolts[MAX_DVFS_FREQS] = { 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1200, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50}; + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25}; static const int core_millivolts[MAX_DVFS_FREQS] = { 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350}; @@ -176,11 +176,11 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 6, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), CPU_DVFS("cpu_g", 6, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1240, 1280, 1360, 1390, 1470, 1500, 1520, 1520, 1590, 1700), - CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600), - CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1420, 1500, 1540, 1600), - CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1240, 1280, 1320, 1380, 1420, 1500, 1540, 1600), - CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1420, 1500, 1540, 1600), - CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1280, 1320, 1340, 1360, 1380, 1420, 1450, 1500, 1540, 1600), + CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1480, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1480, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1170, 1200, 1240, 1300, 1340, 1380, 1400, 1480, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1480, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1340, 1360, 1380, 1400, 1400, 1480, 1540, 1600, 1624), CPU_DVFS("cpu_g", 8, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 8, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), @@ -241,6 +241,24 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("vi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), #ifdef CONFIG_GPU_OVERCLOCK +#ifdef GPU_OC_446 + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), +#endif +#ifdef GPU_OC_484 CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), @@ -256,6 +274,41 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), +#endif +#ifdef GPU_OC_500 + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), +#endif +#ifdef GPU_OC_520 + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), +#endif #else CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), @@ -295,10 +348,30 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), #ifdef CONFIG_GPU_OVERCLOCK +#ifdef GPU_OC_446 + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), +#endif +#ifdef GPU_OC_484 CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), +#endif +#ifdef GPU_OC_500 + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), +#endif +#ifdef GPU_OC_520 + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), +#endif #else CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index 0f0d9f7bdd5..8d450dd6734 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -512,7 +512,7 @@ int tegra_package_id(void) static const int cpu_speedo_nominal_millivolts[] = /* speedo_id * 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 */ - { 1125, 1150, 1150, 1150, 1237, 1237, 1237, 1200, 1150, 912, 850, 850, 1237, 1237}; + { 1125, 1150, 1150, 1150, 1237, 1237, 1237, 1237, 1150, 912, 850, 850, 1237, 1237}; int tegra_cpu_speedo_mv(void) { diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8d14ca7f04b..0e3074089b9 100755 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -614,8 +614,7 @@ static ssize_t show_UV_mV_table(struct cpufreq_policy *policy, char *buf) struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - /* don't show the last 1.6 frequency, we don't use it */ - i = cpu_clk_g->dvfs->num_freqs - 1; + i = cpu_clk_g->dvfs->num_freqs; for(i--; i >=0; i--) { out += sprintf(out, "%lumhz: %i mV\n", @@ -636,8 +635,7 @@ static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_ struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - /* don't mess with the last 1.6 frequency, we don't use it */ - i = cpu_clk_g->dvfs->num_freqs - 1; + i = cpu_clk_g->dvfs->num_freqs; for(i--; i >= 0; i--) { From 6e4652adff78efc11c5f4f3e0a0a4fd7164a6e51 Mon Sep 17 00:00:00 2001 From: motley Date: Mon, 30 Jul 2012 11:15:44 -0400 Subject: [PATCH 035/678] tegra3 dvfs/clocks: -DVFS table tweaks -Frequency table fix fix for 1624MHz (missing multiplier) -CPU frequencies back to 1400, 1500, 1600, and 1624 (leaving the new highest setting) --- arch/arm/mach-tegra/tegra3_clocks.c | 11 +++++++++-- arch/arm/mach-tegra/tegra3_dvfs.c | 14 +++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 1188f52d5df..63a16c72db7 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3531,6 +3531,13 @@ static struct clk_pll_freq_table tegra_pll_x_freq_table[] = { { 19200000, 1700000000, 885, 10, 1, 8}, /* actual: 1699.2 MHz */ { 26000000, 1700000000, 850, 13, 1, 8}, + /* 1.624 GHz */ + { 12000000, 1624000000, 812, 6, 1, 8}, + { 13000000, 1624000000, 999, 8, 1, 8}, /* actual 1623.4 MHz */ + { 16800000, 1624000000, 870, 9, 1, 8}, + { 19200000, 1624000000, 930, 11, 1, 8}, /* actual 1623.2 MHz */ + { 26000000, 1624000000, 812, 13, 1, 8}, + /* 1.6 GHz */ { 12000000, 1600000000, 800, 6, 1, 8}, { 13000000, 1600000000, 738, 6, 1, 8}, /* actual: 1599.0 MHz */ @@ -4643,8 +4650,8 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 7, 1000000 }, { 8, 1200000 }, { 9, 1300000 }, - {10, 1408000 }, - {11, 1504000 }, + {10, 1400000 }, + {11, 1500000 }, {12, 1600000 }, {13, 1624000 }, {14, CPUFREQ_TABLE_END }, diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index a339aab2a6a..f182d90c072 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1200, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1212, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -38,7 +38,7 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1200, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25}; @@ -176,11 +176,11 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 6, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), CPU_DVFS("cpu_g", 6, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1240, 1280, 1360, 1390, 1470, 1500, 1520, 1520, 1590, 1700), - CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1480, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1480, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1170, 1200, 1240, 1300, 1340, 1380, 1400, 1480, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1480, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1340, 1360, 1380, 1400, 1400, 1480, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1330, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), CPU_DVFS("cpu_g", 8, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 8, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), From 22c2e642cb8e825f41ad864feb1aea9ae175b29d Mon Sep 17 00:00:00 2001 From: motley Date: Tue, 31 Jul 2012 22:21:02 -0400 Subject: [PATCH 036/678] Changed highest frequency from 1.624 to 1.7GHz Tweaked DVFS table for the GPU. It should now scale a bit better and still bring the same performance and the top end. Increased core voltage for the highest frequency to 1250mV. Lowest brightness setting increased from 13 to 18 (thanks to clemsyn). Lets give this a try and we can increase it further if need be. --- arch/arm/mach-tegra/board-grouper-panel.c | 2 +- arch/arm/mach-tegra/common.c | 3 +- arch/arm/mach-tegra/tegra3_clocks.c | 21 +++- arch/arm/mach-tegra/tegra3_dvfs.c | 126 +++++++++++----------- arch/arm/mach-tegra/tegra3_speedo.c | 2 + 5 files changed, 89 insertions(+), 65 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index bc38a170e59..09efa40ef59 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -410,7 +410,7 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 13, + .panel_min_brightness = 18, .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index d91ad83fd6a..f002b51ef6c 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -103,7 +103,7 @@ static struct board_info pmu_board_info; static struct board_info display_board_info; static struct board_info camera_board_info; -static int pmu_core_edp = 1200; /* default 1.2V EDP limit */ +static int pmu_core_edp = 1250; /* default 1.2V EDP limit */ static int board_panel_type; static enum power_supply_type pow_supply_type = POWER_SUPPLY_TYPE_MAINS; @@ -560,6 +560,7 @@ static int __init tegra_pmu_core_edp(char *options) { char *p = options; int core_edp = memparse(p, &p); + printk("tegra common core_edp: %u\n",core_edp); if (core_edp != 0) pmu_core_edp = core_edp; return 0; diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 63a16c72db7..70d2a3d8570 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4639,7 +4639,7 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { {14, CPUFREQ_TABLE_END }, }; -static struct cpufreq_frequency_table freq_table_1p7GHz[] = { +static struct cpufreq_frequency_table freq_table_1p624GHz[] = { { 0, 51000 }, { 1, 102000 }, { 2, 204000 }, @@ -4657,6 +4657,24 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { {14, CPUFREQ_TABLE_END }, }; +static struct cpufreq_frequency_table freq_table_1p7GHz[] = { + { 0, 51000 }, + { 1, 102000 }, + { 2, 204000 }, + { 3, 340000 }, + { 4, 475000 }, + { 5, 640000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1200000 }, + { 9, 1300000 }, + {10, 1400000 }, + {11, 1500000 }, + {12, 1600000 }, + {13, 1700000 }, + {14, CPUFREQ_TABLE_END }, +}; + static struct tegra_cpufreq_table_data cpufreq_tables[] = { { freq_table_300MHz, 0, 1 }, { freq_table_1p0GHz, 2, 8 }, @@ -4664,6 +4682,7 @@ static struct tegra_cpufreq_table_data cpufreq_tables[] = { { freq_table_1p4GHz, 2, 11 }, { freq_table_1p5GHz, 2, 12 }, { freq_table_1p6GHz, 2, 12 }, + { freq_table_1p624GHz, 2, 12 }, { freq_table_1p7GHz, 2, 12 }, }; diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index f182d90c072..22855e08c40 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -86,11 +86,13 @@ static int tegra3_get_core_floor_mv(int cpu_mv) return 1100; if ((tegra_cpu_speedo_id() < 2) || (tegra_cpu_speedo_id() == 4) || - (tegra_cpu_speedo_id() == 7) || +// (tegra_cpu_speedo_id() == 7) || (tegra_cpu_speedo_id() == 8)) return 1200; if (cpu_mv < 1100) return 1200; + if (tegra_cpu_speedo_id() == 7) + return 1250; if (cpu_mv <= 1250) return 1300; BUG(); @@ -176,11 +178,11 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 6, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), CPU_DVFS("cpu_g", 6, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1240, 1280, 1360, 1390, 1470, 1500, 1520, 1520, 1590, 1700), - CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1500, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), - CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1330, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600, 1700), + CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1500, 1540, 1600, 1700), + CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1700), + CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1700), + CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1330, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1700), CPU_DVFS("cpu_g", 8, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 8, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), @@ -242,7 +244,7 @@ static struct dvfs core_dvfs_table[] = { #ifdef CONFIG_GPU_OVERCLOCK #ifdef GPU_OC_446 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -250,7 +252,7 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -259,55 +261,55 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), #endif #ifdef GPU_OC_484 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), #endif #ifdef GPU_OC_500 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 500000, 500000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 500000, 500000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), #endif #ifdef GPU_OC_520 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 520000, 520000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 520000, 520000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), #endif #else CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), @@ -349,26 +351,26 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef GPU_OC_446 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef GPU_OC_484 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef GPU_OC_500 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 500000, 500000, 500000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 500000, 500000, 500000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef GPU_OC_520 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 520000, 520000, 520000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 520000, 520000, 520000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index 8d450dd6734..62f5f246172 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -526,6 +526,8 @@ int tegra_core_speedo_mv(void) case 0: return 1200; case 1: + if (cpu_speedo_id == 7) + return 1250; if ((cpu_speedo_id != 7) && (cpu_speedo_id != 8)) return 1200; /* fall thru for T30L or T30SL */ From b301004a704880e528aef6fc7332b2b42fd444ec Mon Sep 17 00:00:00 2001 From: Antti P Miettinen Date: Wed, 1 Aug 2012 12:26:47 -0400 Subject: [PATCH 037/678] ARM: tegra: power: Cancel hotplug work upon disable Cancel hotplug work when auto hotplug gets disabled to prevent e.g. cpu_up() getting called in LP cluster. Bug 965777 --- arch/arm/mach-tegra/cpu-tegra3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index ef73d4b4e93..b5e0d4e8959 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -147,9 +147,12 @@ static int hp_state_set(const char *arg, const struct kernel_param *kp) if (ret == 0) { if ((hp_state == TEGRA_HP_DISABLED) && - (old_state != TEGRA_HP_DISABLED)) - pr_info("Tegra auto-hotplug disabled\n"); - else if (hp_state != TEGRA_HP_DISABLED) { + (old_state != TEGRA_HP_DISABLED)) { + mutex_unlock(tegra3_cpu_lock); + cancel_delayed_work_sync(&hotplug_work); + mutex_lock(tegra3_cpu_lock); + pr_info("Tegra auto-hotplug disabled\n"); + } else if (hp_state != TEGRA_HP_DISABLED) { if (old_state == TEGRA_HP_DISABLED) { pr_info("Tegra auto-hotplug enabled\n"); hp_init_stats(); From 10f6f5c1cfe8a0fb62e345f43343d0e6457e0c79 Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Wed, 1 Aug 2012 12:46:54 -0400 Subject: [PATCH 038/678] ARM: tegra: power: Use runnable threads average for hotplug Sample scheduler runnable threads average in auto-hotplug work function and use it to determine the auto-hotplug target for number of on-line cores. Use cpu up delay as sampling period, and enforce down delay by checking last cpu configuration change time stamp. Bug 958978 Note: this commit had two lines that were already merged in the Google source, now the entire patch should be complete. --- arch/arm/mach-tegra/cpu-tegra3.c | 44 +++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index b5e0d4e8959..a61ea21a981 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -73,6 +73,8 @@ static struct clk *cpu_clk; static struct clk *cpu_g_clk; static struct clk *cpu_lp_clk; +static unsigned long last_change_time; + static struct { cputime64_t time_up_total; u64 last_update; @@ -186,6 +188,14 @@ enum { TEGRA_CPU_SPEED_SKEWED, }; +#define NR_FSHIFT 2 +static unsigned int nr_run_thresholds[] = { +/* 1, 2, 3, 4 - on-line cpus target */ + 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ +}; +static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ +static unsigned int nr_run_last; + static noinline int tegra_cpu_speed_balance(void) { unsigned long highest_speed = tegra_cpu_highest_speed(); @@ -194,17 +204,36 @@ static noinline int tegra_cpu_speed_balance(void) unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; unsigned int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + unsigned int avg_nr_run = avg_nr_running(); + unsigned int nr_run; + + /* Evaluate: + * - distribution of freq targets for already on-lined CPUs + * - average number of runnable threads + * - effective MIPS available within EDP frequency limits, + * and return: + * TEGRA_CPU_SPEED_BALANCED to bring one more CPU core on-line + * TEGRA_CPU_SPEED_BIASED to keep CPU core composition unchanged + * TEGRA_CPU_SPEED_SKEWED to remove CPU core off-line + */ + for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { + unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; + if (nr_run_last <= nr_run) + nr_threshold += nr_run_hysteresis; + if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT))) + break; + } + nr_run_last = nr_run; - /* balanced: freq targets for all CPUs are above 50% of highest speed - biased: freq target for at least one CPU is below 50% threshold - skewed: freq targets for at least 2 CPUs are below 25% threshold */ if (((tegra_count_slow_cpus(skewed_speed) >= 2) || + (nr_run < nr_cpus) || tegra_cpu_edp_favor_down(nr_cpus, mp_overhead) || (highest_speed <= idle_bottom_freq) || (nr_cpus > max_cpus)) && (nr_cpus > min_cpus)) return TEGRA_CPU_SPEED_SKEWED; if (((tegra_count_slow_cpus(balanced_speed) >= 1) || + (nr_run <= nr_cpus) || (!tegra_cpu_edp_favor_up(nr_cpus, mp_overhead)) || (highest_speed <= idle_bottom_freq) || (nr_cpus == max_cpus)) && (nr_cpus >= min_cpus)) @@ -222,7 +251,6 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) bool up = false; unsigned int cpu = nr_cpu_ids; unsigned long now = jiffies; - static unsigned long last_change_time; mutex_lock(tegra3_cpu_lock); @@ -235,7 +263,7 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { up = false; } else if (!is_lp_cluster() && !no_lp && - !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS)) { + ((now - last_change_time) >= down_delay)) { if(!clk_set_parent(cpu_clk, cpu_lp_clk)) { hp_stats_update(CONFIG_NR_CPUS, true); hp_stats_update(0, false); @@ -245,11 +273,12 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) } } queue_delayed_work( - hotplug_wq, &hotplug_work, down_delay); + hotplug_wq, &hotplug_work, up2gn_delay); break; case TEGRA_HP_UP: if (is_lp_cluster() && !no_lp) { if(!clk_set_parent(cpu_clk, cpu_g_clk)) { + last_change_time = now; hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); /* catch-up with governor target speed */ @@ -316,6 +345,7 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) tegra_update_cpu_speed(speed); if (!clk_set_parent(cpu_clk, cpu_g_clk)) { + last_change_time = jiffies; hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } @@ -381,7 +411,7 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) } else if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( - hotplug_wq, &hotplug_work, down_delay); + hotplug_wq, &hotplug_work, up_delay); } break; case TEGRA_HP_DOWN: From c64887dfea5fbee5d259a8c83df57e8fa042c429 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 1 Aug 2012 12:50:03 -0400 Subject: [PATCH 039/678] cpu-tegra2: Tuned the runnable threads threshold from 5/9/13 to 5/9/10 in order to improve performance --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index a61ea21a981..72c5bf838e5 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -191,7 +191,7 @@ enum { #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ - 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ + 5, 9, 10, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; From 14640d9aa2915bcb04bc87d0d75175ecd0201616 Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Wed, 1 Aug 2012 20:08:49 -0400 Subject: [PATCH 040/678] scheduler changes from Nvidia scheduler: compute time-average nr_running per run-queue Compute the time-average number of running tasks per run-queue for a trailing window of a fixed time period. The detla add/sub to the average value is weighted by the amount of time per nr_running value relative to the total measurement period. scheduler: Re-compute time-average nr_running on read Re-compute time-average nr_running when it is read. This would prevent reading stalled average value if there were no run-queue changes for a long time. New average value is returned to the reader, but not stored to avoid concurrent writes. Light-weight sequential counter synchronization is used to assure data consistency for re-computing average. --- include/linux/sched.h | 1 + kernel/sched.c | 69 ++++++++++++++++++++++++++++++++++++++++++- kernel/sched_debug.c | 3 ++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5bb4dd2e4c5..c9e03a9aa95 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -139,6 +139,7 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); +extern unsigned long avg_nr_running(void); extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); diff --git a/kernel/sched.c b/kernel/sched.c index 6121c2ce14b..e8f879f48e7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -472,6 +472,11 @@ struct rq { #endif int skip_clock_update; + /* time-based average load */ + u64 nr_last_stamp; + unsigned int ave_nr_running; + seqcount_t ave_seqcnt; + /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -1756,14 +1761,48 @@ static const struct sched_class rt_sched_class; #include "sched_stats.h" +/* 27 ~= 134217728ns = 134.2ms + * 26 ~= 67108864ns = 67.1ms + * 25 ~= 33554432ns = 33.5ms + * 24 ~= 16777216ns = 16.8ms */ +#define NR_AVE_PERIOD_EXP 27 +#define NR_AVE_SCALE(x) ((x) << FSHIFT) +#define NR_AVE_PERIOD (1 << NR_AVE_PERIOD_EXP) +#define NR_AVE_DIV_PERIOD(x) ((x) >> NR_AVE_PERIOD_EXP) + +static inline unsigned int do_avg_nr_running(struct rq *rq) +{ + s64 nr, deltax; + unsigned int ave_nr_running = rq->ave_nr_running; + + deltax = rq->clock_task - rq->nr_last_stamp; + nr = NR_AVE_SCALE(rq->nr_running); + + if (deltax > NR_AVE_PERIOD) + ave_nr_running = nr; + else + ave_nr_running += + NR_AVE_DIV_PERIOD(deltax * (nr - ave_nr_running)); + + return ave_nr_running; +} + static void inc_nr_running(struct rq *rq) { + write_seqcount_begin(&rq->ave_seqcnt); + rq->ave_nr_running = do_avg_nr_running(rq); + rq->nr_last_stamp = rq->clock_task; rq->nr_running++; + write_seqcount_end(&rq->ave_seqcnt); } static void dec_nr_running(struct rq *rq) { + write_seqcount_begin(&rq->ave_seqcnt); + rq->ave_nr_running = do_avg_nr_running(rq); + rq->nr_last_stamp = rq->clock_task; rq->nr_running--; + write_seqcount_end(&rq->ave_seqcnt); } static void set_load_weight(struct task_struct *p) @@ -3255,6 +3294,34 @@ unsigned long nr_iowait(void) return sum; } +unsigned long avg_nr_running(void) +{ + unsigned long i, sum = 0; + unsigned int seqcnt, ave_nr_running; + + for_each_online_cpu(i) { + struct rq *q = cpu_rq(i); + + /* + * Update average to avoid reading stalled value if there were + * no run-queue changes for a long time. On the other hand if + * the changes are happening right now, just read current value + * directly. + */ + seqcnt = read_seqcount_begin(&q->ave_seqcnt); + ave_nr_running = do_avg_nr_running(q); + if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) { + read_seqcount_begin(&q->ave_seqcnt); + ave_nr_running = q->ave_nr_running; + } + + sum += ave_nr_running; + } + + return sum; + +} + unsigned long nr_iowait_cpu(int cpu) { struct rq *this = cpu_rq(cpu); @@ -8192,7 +8259,7 @@ void __init sched_init(void) atomic_set(&nohz.load_balancer, nr_cpu_ids); atomic_set(&nohz.first_pick_cpu, nr_cpu_ids); atomic_set(&nohz.second_pick_cpu, nr_cpu_ids); - //nohz.next_balance = jiffies; + nohz.next_balance = jiffies; #endif /* May be allocated at isolcpus cmdline parse time */ if (cpu_isolated_map == NULL) diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index a6710a112b4..145566ebd99 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -264,6 +264,9 @@ static void print_cpu(struct seq_file *m, int cpu) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) P(nr_running); + SEQ_printf(m, " .%-30s: %d.%03d \n", "ave_nr_running", + rq->ave_nr_running / FIXED_1, + ((rq->ave_nr_running % FIXED_1) * 1000) / FIXED_1); SEQ_printf(m, " .%-30s: %lu\n", "load", rq->load.weight); P(nr_switches); From 30b4a8062871352f265427b1d02adc6a236e4e37 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 1 Aug 2012 20:11:01 -0400 Subject: [PATCH 041/678] cpu-tegra: Let's set the stock frequency at boot-time since the anykernel method disallows it from always being set in the ramdisk. --- arch/arm/mach-tegra/cpu-tegra.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index f17a15025f4..bafb1c58e91 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -736,6 +736,9 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) register_pm_notifier(&tegra_cpu_pm_notifier); } + /* set to 1.3GHz stock freq on init */ + policy->max = 1300000; + return 0; } From c6b25be0b95261fb2ecb5c5d36f4f84bb8940092 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Aug 2012 22:29:42 -0400 Subject: [PATCH 042/678] ARM: fix rcu stalls on SMP platforms We can stall RCU processing on SMP platforms if a CPU sits in its idle loop for a long time. This happens because we don't call irq_enter() and irq_exit() around generic_smp_call_function_interrupt() and friends. Add the necessary calls, and remove the one from within ipi_timer(), so that they're all in a common place. Note: this patch was already partially in place --- arch/arm/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 47b09a3ea04..e5d953ba4db 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -457,9 +457,7 @@ static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent); static void ipi_timer(void) { struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent); - irq_enter(); evt->event_handler(evt); - irq_exit(); } #ifdef CONFIG_LOCAL_TIMERS @@ -632,7 +630,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) switch (ipinr) { case IPI_TIMER: + irq_enter(); ipi_timer(); + irq_exit(); break; case IPI_RESCHEDULE: From 06ba742c37a8c86f730b4de2d4933ae43019326b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Aug 2012 22:32:35 -0400 Subject: [PATCH 043/678] ARM: SMP: wait for CPU to be marked active When we bring a CPU online, we should wait for it to become active before entering the idle thread, so we know that the scheduler and thread migration is going to work. --- arch/arm/kernel/smp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e5d953ba4db..19420aeb30e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -315,6 +315,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void) */ set_cpu_online(cpu, true); + while (!cpu_active(cpu)) + cpu_relax(); + /* * Setup the percpu timer for this CPU. */ From 8acb01bbdc699c338302b74929663220b36d324e Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 1 Aug 2012 22:38:21 -0400 Subject: [PATCH 044/678] Revert "ARM: SMP: wait for CPU to be marked active" This reverts commit 30057f972d5c97fe3f4c24899c139a0b47ee5786. --- arch/arm/kernel/smp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 19420aeb30e..e5d953ba4db 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -315,9 +315,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) */ set_cpu_online(cpu, true); - while (!cpu_active(cpu)) - cpu_relax(); - /* * Setup the percpu timer for this CPU. */ From 904bc093fa4ede4cf89f91a4cb9d4f41b10b853b Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 1 Aug 2012 22:47:06 -0400 Subject: [PATCH 045/678] Revert "ARM: fix rcu stalls on SMP platforms" This reverts commit 0dc67836c228665f1384342e90cec1655263c1b1. --- arch/arm/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e5d953ba4db..47b09a3ea04 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -457,7 +457,9 @@ static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent); static void ipi_timer(void) { struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent); + irq_enter(); evt->event_handler(evt); + irq_exit(); } #ifdef CONFIG_LOCAL_TIMERS @@ -630,9 +632,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) switch (ipinr) { case IPI_TIMER: - irq_enter(); ipi_timer(); - irq_exit(); break; case IPI_RESCHEDULE: From 60f26372779294ccc25d3a708f60a7c0da9a4c20 Mon Sep 17 00:00:00 2001 From: motley Date: Thu, 2 Aug 2012 09:51:14 -0400 Subject: [PATCH 046/678] Revert "scheduler changes from Nvidia" This reverts commit 81b9284f801bcc8ebd321fc1c92bedd6deeea51b. --- include/linux/sched.h | 1 - kernel/sched.c | 69 +------------------------------------------ kernel/sched_debug.c | 3 -- 3 files changed, 1 insertion(+), 72 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index c9e03a9aa95..5bb4dd2e4c5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -139,7 +139,6 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); -extern unsigned long avg_nr_running(void); extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); diff --git a/kernel/sched.c b/kernel/sched.c index e8f879f48e7..6121c2ce14b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -472,11 +472,6 @@ struct rq { #endif int skip_clock_update; - /* time-based average load */ - u64 nr_last_stamp; - unsigned int ave_nr_running; - seqcount_t ave_seqcnt; - /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -1761,48 +1756,14 @@ static const struct sched_class rt_sched_class; #include "sched_stats.h" -/* 27 ~= 134217728ns = 134.2ms - * 26 ~= 67108864ns = 67.1ms - * 25 ~= 33554432ns = 33.5ms - * 24 ~= 16777216ns = 16.8ms */ -#define NR_AVE_PERIOD_EXP 27 -#define NR_AVE_SCALE(x) ((x) << FSHIFT) -#define NR_AVE_PERIOD (1 << NR_AVE_PERIOD_EXP) -#define NR_AVE_DIV_PERIOD(x) ((x) >> NR_AVE_PERIOD_EXP) - -static inline unsigned int do_avg_nr_running(struct rq *rq) -{ - s64 nr, deltax; - unsigned int ave_nr_running = rq->ave_nr_running; - - deltax = rq->clock_task - rq->nr_last_stamp; - nr = NR_AVE_SCALE(rq->nr_running); - - if (deltax > NR_AVE_PERIOD) - ave_nr_running = nr; - else - ave_nr_running += - NR_AVE_DIV_PERIOD(deltax * (nr - ave_nr_running)); - - return ave_nr_running; -} - static void inc_nr_running(struct rq *rq) { - write_seqcount_begin(&rq->ave_seqcnt); - rq->ave_nr_running = do_avg_nr_running(rq); - rq->nr_last_stamp = rq->clock_task; rq->nr_running++; - write_seqcount_end(&rq->ave_seqcnt); } static void dec_nr_running(struct rq *rq) { - write_seqcount_begin(&rq->ave_seqcnt); - rq->ave_nr_running = do_avg_nr_running(rq); - rq->nr_last_stamp = rq->clock_task; rq->nr_running--; - write_seqcount_end(&rq->ave_seqcnt); } static void set_load_weight(struct task_struct *p) @@ -3294,34 +3255,6 @@ unsigned long nr_iowait(void) return sum; } -unsigned long avg_nr_running(void) -{ - unsigned long i, sum = 0; - unsigned int seqcnt, ave_nr_running; - - for_each_online_cpu(i) { - struct rq *q = cpu_rq(i); - - /* - * Update average to avoid reading stalled value if there were - * no run-queue changes for a long time. On the other hand if - * the changes are happening right now, just read current value - * directly. - */ - seqcnt = read_seqcount_begin(&q->ave_seqcnt); - ave_nr_running = do_avg_nr_running(q); - if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) { - read_seqcount_begin(&q->ave_seqcnt); - ave_nr_running = q->ave_nr_running; - } - - sum += ave_nr_running; - } - - return sum; - -} - unsigned long nr_iowait_cpu(int cpu) { struct rq *this = cpu_rq(cpu); @@ -8259,7 +8192,7 @@ void __init sched_init(void) atomic_set(&nohz.load_balancer, nr_cpu_ids); atomic_set(&nohz.first_pick_cpu, nr_cpu_ids); atomic_set(&nohz.second_pick_cpu, nr_cpu_ids); - nohz.next_balance = jiffies; + //nohz.next_balance = jiffies; #endif /* May be allocated at isolcpus cmdline parse time */ if (cpu_isolated_map == NULL) diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 145566ebd99..a6710a112b4 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -264,9 +264,6 @@ static void print_cpu(struct seq_file *m, int cpu) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) P(nr_running); - SEQ_printf(m, " .%-30s: %d.%03d \n", "ave_nr_running", - rq->ave_nr_running / FIXED_1, - ((rq->ave_nr_running % FIXED_1) * 1000) / FIXED_1); SEQ_printf(m, " .%-30s: %lu\n", "load", rq->load.weight); P(nr_switches); From ded31352a4046ee10ac686ccc3ca60c2bdf63099 Mon Sep 17 00:00:00 2001 From: motley Date: Thu, 2 Aug 2012 09:51:22 -0400 Subject: [PATCH 047/678] Revert "cpu-tegra2: Tuned the runnable threads threshold from 5/9/13 to 5/9/10 in order to improve performance" This reverts commit 3727a7b3e97dd0573ac93a76d1f025c37cf9765a. --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 72c5bf838e5..a61ea21a981 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -191,7 +191,7 @@ enum { #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ - 5, 9, 10, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ + 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; From 8c6cd3e60662c4c494f07b6f406726a69d7bd49d Mon Sep 17 00:00:00 2001 From: motley Date: Thu, 2 Aug 2012 09:51:37 -0400 Subject: [PATCH 048/678] Revert "ARM: tegra: power: Use runnable threads average for hotplug" This reverts commit 2cdbd719c0c1eae5399e392ad8ae7f3c883b7331. --- arch/arm/mach-tegra/cpu-tegra3.c | 44 +++++--------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index a61ea21a981..b5e0d4e8959 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -73,8 +73,6 @@ static struct clk *cpu_clk; static struct clk *cpu_g_clk; static struct clk *cpu_lp_clk; -static unsigned long last_change_time; - static struct { cputime64_t time_up_total; u64 last_update; @@ -188,14 +186,6 @@ enum { TEGRA_CPU_SPEED_SKEWED, }; -#define NR_FSHIFT 2 -static unsigned int nr_run_thresholds[] = { -/* 1, 2, 3, 4 - on-line cpus target */ - 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ -}; -static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ -static unsigned int nr_run_last; - static noinline int tegra_cpu_speed_balance(void) { unsigned long highest_speed = tegra_cpu_highest_speed(); @@ -204,36 +194,17 @@ static noinline int tegra_cpu_speed_balance(void) unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; unsigned int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); - unsigned int avg_nr_run = avg_nr_running(); - unsigned int nr_run; - - /* Evaluate: - * - distribution of freq targets for already on-lined CPUs - * - average number of runnable threads - * - effective MIPS available within EDP frequency limits, - * and return: - * TEGRA_CPU_SPEED_BALANCED to bring one more CPU core on-line - * TEGRA_CPU_SPEED_BIASED to keep CPU core composition unchanged - * TEGRA_CPU_SPEED_SKEWED to remove CPU core off-line - */ - for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { - unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; - if (nr_run_last <= nr_run) - nr_threshold += nr_run_hysteresis; - if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT))) - break; - } - nr_run_last = nr_run; + /* balanced: freq targets for all CPUs are above 50% of highest speed + biased: freq target for at least one CPU is below 50% threshold + skewed: freq targets for at least 2 CPUs are below 25% threshold */ if (((tegra_count_slow_cpus(skewed_speed) >= 2) || - (nr_run < nr_cpus) || tegra_cpu_edp_favor_down(nr_cpus, mp_overhead) || (highest_speed <= idle_bottom_freq) || (nr_cpus > max_cpus)) && (nr_cpus > min_cpus)) return TEGRA_CPU_SPEED_SKEWED; if (((tegra_count_slow_cpus(balanced_speed) >= 1) || - (nr_run <= nr_cpus) || (!tegra_cpu_edp_favor_up(nr_cpus, mp_overhead)) || (highest_speed <= idle_bottom_freq) || (nr_cpus == max_cpus)) && (nr_cpus >= min_cpus)) @@ -251,6 +222,7 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) bool up = false; unsigned int cpu = nr_cpu_ids; unsigned long now = jiffies; + static unsigned long last_change_time; mutex_lock(tegra3_cpu_lock); @@ -263,7 +235,7 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { up = false; } else if (!is_lp_cluster() && !no_lp && - ((now - last_change_time) >= down_delay)) { + !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS)) { if(!clk_set_parent(cpu_clk, cpu_lp_clk)) { hp_stats_update(CONFIG_NR_CPUS, true); hp_stats_update(0, false); @@ -273,12 +245,11 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) } } queue_delayed_work( - hotplug_wq, &hotplug_work, up2gn_delay); + hotplug_wq, &hotplug_work, down_delay); break; case TEGRA_HP_UP: if (is_lp_cluster() && !no_lp) { if(!clk_set_parent(cpu_clk, cpu_g_clk)) { - last_change_time = now; hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); /* catch-up with governor target speed */ @@ -345,7 +316,6 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) tegra_update_cpu_speed(speed); if (!clk_set_parent(cpu_clk, cpu_g_clk)) { - last_change_time = jiffies; hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } @@ -411,7 +381,7 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) } else if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( - hotplug_wq, &hotplug_work, up_delay); + hotplug_wq, &hotplug_work, down_delay); } break; case TEGRA_HP_DOWN: From cc457c29d105cbd79f883c6b4dc40a156dd36483 Mon Sep 17 00:00:00 2001 From: motley Date: Thu, 2 Aug 2012 09:52:40 -0400 Subject: [PATCH 049/678] Revert "Changed highest frequency from 1.624 to 1.7GHz" This reverts commit 7ff8985adb62c0fc355f87626372101aa70e227f. --- arch/arm/mach-tegra/board-grouper-panel.c | 2 +- arch/arm/mach-tegra/common.c | 3 +- arch/arm/mach-tegra/tegra3_clocks.c | 21 +--- arch/arm/mach-tegra/tegra3_dvfs.c | 126 +++++++++++----------- arch/arm/mach-tegra/tegra3_speedo.c | 2 - 5 files changed, 65 insertions(+), 89 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 09efa40ef59..bc38a170e59 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -410,7 +410,7 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 18, + .panel_min_brightness = 13, .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index f002b51ef6c..d91ad83fd6a 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -103,7 +103,7 @@ static struct board_info pmu_board_info; static struct board_info display_board_info; static struct board_info camera_board_info; -static int pmu_core_edp = 1250; /* default 1.2V EDP limit */ +static int pmu_core_edp = 1200; /* default 1.2V EDP limit */ static int board_panel_type; static enum power_supply_type pow_supply_type = POWER_SUPPLY_TYPE_MAINS; @@ -560,7 +560,6 @@ static int __init tegra_pmu_core_edp(char *options) { char *p = options; int core_edp = memparse(p, &p); - printk("tegra common core_edp: %u\n",core_edp); if (core_edp != 0) pmu_core_edp = core_edp; return 0; diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 70d2a3d8570..63a16c72db7 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4639,24 +4639,6 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { {14, CPUFREQ_TABLE_END }, }; -static struct cpufreq_frequency_table freq_table_1p624GHz[] = { - { 0, 51000 }, - { 1, 102000 }, - { 2, 204000 }, - { 3, 340000 }, - { 4, 475000 }, - { 5, 640000 }, - { 6, 860000 }, - { 7, 1000000 }, - { 8, 1200000 }, - { 9, 1300000 }, - {10, 1400000 }, - {11, 1500000 }, - {12, 1600000 }, - {13, 1624000 }, - {14, CPUFREQ_TABLE_END }, -}; - static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 0, 51000 }, { 1, 102000 }, @@ -4671,7 +4653,7 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { {10, 1400000 }, {11, 1500000 }, {12, 1600000 }, - {13, 1700000 }, + {13, 1624000 }, {14, CPUFREQ_TABLE_END }, }; @@ -4682,7 +4664,6 @@ static struct tegra_cpufreq_table_data cpufreq_tables[] = { { freq_table_1p4GHz, 2, 11 }, { freq_table_1p5GHz, 2, 12 }, { freq_table_1p6GHz, 2, 12 }, - { freq_table_1p624GHz, 2, 12 }, { freq_table_1p7GHz, 2, 12 }, }; diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 22855e08c40..f182d90c072 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1212, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -86,13 +86,11 @@ static int tegra3_get_core_floor_mv(int cpu_mv) return 1100; if ((tegra_cpu_speedo_id() < 2) || (tegra_cpu_speedo_id() == 4) || -// (tegra_cpu_speedo_id() == 7) || + (tegra_cpu_speedo_id() == 7) || (tegra_cpu_speedo_id() == 8)) return 1200; if (cpu_mv < 1100) return 1200; - if (tegra_cpu_speedo_id() == 7) - return 1250; if (cpu_mv <= 1250) return 1300; BUG(); @@ -178,11 +176,11 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 6, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), CPU_DVFS("cpu_g", 6, 4, MHZ, 550, 550, 770, 770, 940, 940, 1160, 1240, 1280, 1360, 1390, 1470, 1500, 1520, 1520, 1590, 1700), - CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600, 1700), - CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1500, 1540, 1600, 1700), - CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1700), - CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1700), - CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1330, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1700), + CPU_DVFS("cpu_g", 7, 0, MHZ, 460, 480, 550, 570, 680, 700, 820, 970, 1040, 1080, 1150, 1200, 1280, 1360, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 1, MHZ, 480, 500, 650, 670, 780, 800, 990, 1040, 1100, 1200, 1300, 1320, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 3, MHZ, 550, 550, 750, 770, 910, 940, 1150, 1230, 1280, 1320, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), + CPU_DVFS("cpu_g", 7, 4, MHZ, 550, 550, 750, 770, 940, 940, 1160, 1280, 1300, 1330, 1340, 1360, 1380, 1400, 1500, 1540, 1600, 1624), CPU_DVFS("cpu_g", 8, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1280, 1300), CPU_DVFS("cpu_g", 8, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1300), @@ -244,7 +242,7 @@ static struct dvfs core_dvfs_table[] = { #ifdef CONFIG_GPU_OVERCLOCK #ifdef GPU_OC_446 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -252,7 +250,7 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -261,55 +259,55 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), #endif #ifdef GPU_OC_484 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 484000, 484000, 484000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), #endif #ifdef GPU_OC_500 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 500000, 500000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 500000, 500000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 500000, 500000, 500000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), #endif #ifdef GPU_OC_520 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 520000, 520000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 520000, 520000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 520000, 520000, 520000), + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), #endif #else CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), @@ -351,26 +349,26 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef GPU_OC_446 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 446000, 446000, 446000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef GPU_OC_484 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 484000, 484000, 484000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef GPU_OC_500 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 500000, 500000, 500000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 500000, 500000, 500000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef GPU_OC_520 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 520000, 520000, 520000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 437000, 520000, 520000, 520000), + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index 62f5f246172..8d450dd6734 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -526,8 +526,6 @@ int tegra_core_speedo_mv(void) case 0: return 1200; case 1: - if (cpu_speedo_id == 7) - return 1250; if ((cpu_speedo_id != 7) && (cpu_speedo_id != 8)) return 1200; /* fall thru for T30L or T30SL */ From df3b3c515fd2524102042352b3a3d66e25fde26f Mon Sep 17 00:00:00 2001 From: motley Date: Fri, 3 Aug 2012 17:53:09 -0400 Subject: [PATCH 050/678] v1.0.12 changes Changed highest frequency back to 1.624GHz and core voltage back to 1200mV. Refresh rate now adjusted with GPU OC clock at compile time. Higher FPS should be realized at 484 and 520 for most (thanks to clemsyn for sharing his research and findings) Adjustments to the tegra 3 algorirthim for bring cpus online and offline, especially for the OC frequencies. Fixed GPU clock compile time switch. Removed 500MHz choice. Moved the frequency policy to 1.3GHz on startup to cpu 0 only. Minor adjustment to the interactive governor to make it slightly more responsive when demand increases. --- arch/arm/mach-tegra/Kconfig | 2 - arch/arm/mach-tegra/board-grouper-panel.c | 19 +++++- arch/arm/mach-tegra/cpu-tegra.c | 5 +- arch/arm/mach-tegra/cpu-tegra3.c | 19 ++++-- arch/arm/mach-tegra/tegra3_dvfs.c | 71 ++++++++--------------- drivers/cpufreq/cpufreq_interactive.c | 4 +- 6 files changed, 61 insertions(+), 59 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 8c0cae34b59..9f32b0a7412 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -301,8 +301,6 @@ choice bool "446 MHz" config GPU_OC_484 bool "484 MHz" - config GPU_OC_500 - bool "500 MHz" config GPU_OC_520 bool "520 MHz" diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index bc38a170e59..2c3c43e3788 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -387,9 +387,24 @@ static struct resource grouper_disp2_resources[] = { #endif static struct tegra_dc_mode grouper_panel_modes[] = { - { - /* 1280x800@60Hz */ +{ +#ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_446 + /* 1280x800@62Hz */ .pclk = 74180000, +#endif +#ifdef CONFIG_GPU_OC_484 + /* 1280x800@63Hz */ + .pclk = 77395348, +#endif +#ifdef CONFIG_GPU_OC_520 + /* 1280x800@65Hz */ + .pclk = 81170731, +#endif +#else + /* 1280x800@60Hz */ + .pclk = 74180000, +#endif .h_ref_to_sync = 1, .v_ref_to_sync = 1, .h_sync_width = 24, diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index bafb1c58e91..dda18c699d1 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -733,12 +733,11 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) cpumask_copy(policy->related_cpus, cpu_possible_mask); if (policy->cpu == 0) { + /* set to 1.3GHz stock freq on init */ + policy->max = 1300000; register_pm_notifier(&tegra_cpu_pm_notifier); } - /* set to 1.3GHz stock freq on init */ - policy->max = 1300000; - return 0; } diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index b5e0d4e8959..b221ff9353c 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -41,7 +41,7 @@ #define INITIAL_STATE TEGRA_HP_DISABLED #define UP2G0_DELAY_MS 70 #define UP2Gn_DELAY_MS 100 -#define DOWN_DELAY_MS 1000 +#define DOWN_DELAY_MS 750 static struct mutex *tegra3_cpu_lock; @@ -66,7 +66,7 @@ module_param(idle_bottom_freq, uint, 0644); static int mp_overhead = 10; module_param(mp_overhead, int, 0644); -static int balance_level = 75; +static int balance_level = 60; module_param(balance_level, int, 0644); static struct clk *cpu_clk; @@ -189,11 +189,22 @@ enum { static noinline int tegra_cpu_speed_balance(void) { unsigned long highest_speed = tegra_cpu_highest_speed(); - unsigned long balanced_speed = highest_speed * balance_level / 100; - unsigned long skewed_speed = balanced_speed / 2; unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; unsigned int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + unsigned long balanced_speed; + + switch (highest_speed) { + case 1700000: + case 1600000: + case 1500000: + balanced_speed = 860000; + break; + default: + highest_speed * balance_level / 100; + } + + unsigned long skewed_speed = balanced_speed / 2; /* balanced: freq targets for all CPUs are above 50% of highest speed biased: freq target for at least one CPU is below 50% threshold diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index f182d90c072..3696dfef908 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -240,8 +240,10 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("vi", 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), CORE_DVFS("vi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), +/* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ + #ifdef CONFIG_GPU_OVERCLOCK -#ifdef GPU_OC_446 +#ifdef CONFIG_GPU_OC_446 CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -258,7 +260,7 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), #endif -#ifdef GPU_OC_484 +#ifdef CONFIG_GPU_OC_484 CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), @@ -275,39 +277,22 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), #endif -#ifdef GPU_OC_500 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 500000, 500000, 500000), -#endif -#ifdef GPU_OC_520 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 520000, 520000, 520000), +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), #endif #else CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), @@ -348,27 +333,21 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), #ifdef CONFIG_GPU_OVERCLOCK -#ifdef GPU_OC_446 +#ifdef CONFIG_GPU_OC_446 CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif -#ifdef GPU_OC_484 +#ifdef CONFIG_GPU_OC_484 CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif -#ifdef GPU_OC_500 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 500000, 500000, 500000), - CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), -#endif -#ifdef GPU_OC_520 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 520000, 520000, 520000), +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 0a7b6d1c660..f6de88549cc 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -89,11 +89,11 @@ static struct cpufreq_interactive_core_lock core_lock; static u64 hispeed_freq; /* Boost frequency by boost_factor when CPU load at or above this value. */ -#define DEFAULT_GO_MAXSPEED_LOAD 85 +#define DEFAULT_GO_MAXSPEED_LOAD 80 static unsigned long go_maxspeed_load; /* Go to hispeed_freq when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 85 +#define DEFAULT_GO_HISPEED_LOAD 80 static unsigned long go_hispeed_load; /* Base of exponential raise to max speed; if 0 - jump to maximum */ From fa22f7d48255e3c40dc410a3f26d198b90d70a07 Mon Sep 17 00:00:00 2001 From: Ezekeel Date: Tue, 7 Aug 2012 01:12:19 -0400 Subject: [PATCH 051/678] Added FSync Control version 1 --- drivers/misc/Kconfig | 6 ++ drivers/misc/Makefile | 1 + drivers/misc/fsync_control.c | 110 +++++++++++++++++++++++++++++++++++ fs/sync.c | 45 ++++++++++++++ 4 files changed, 162 insertions(+) create mode 100644 drivers/misc/fsync_control.c diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 85eff542b78..74ef5a57820 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -577,4 +577,10 @@ source "drivers/misc/lis3lv02d/Kconfig" source "drivers/misc/carma/Kconfig" source "drivers/misc/tegra-baseband/Kconfig" +config FSYNC_CONTROL + bool "Support for FSync Control" + default y + help + Say Y here to enable FSync Control + endif # MISC_DEVICES diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7c26f45f609..ffb00c2a8a1 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -59,3 +59,4 @@ obj-$(CONFIG_BCM4330_RFKILL) += bcm4330_rfkill.o obj-$(CONFIG_TEGRA_CRYPTO_DEV) += tegra-cryptodev.o obj-$(CONFIG_TEGRA_BB_SUPPORT) += tegra-baseband/ obj-$(CONFIG_MAX1749_VIBRATOR) += max1749.o +obj-$(CONFIG_FSYNC_CONTROL) += fsync_control.o \ No newline at end of file diff --git a/drivers/misc/fsync_control.c b/drivers/misc/fsync_control.c new file mode 100644 index 00000000000..eceb8f7cc53 --- /dev/null +++ b/drivers/misc/fsync_control.c @@ -0,0 +1,110 @@ +/* drivers/misc/fsync_control.c + * + * Copyright 2012 Ezekeel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#define FSYNCCONTROL_VERSION 1 + +static bool fsync_enabled = true; + +bool fsynccontrol_fsync_enabled() +{ + return fsync_enabled; +} +EXPORT_SYMBOL(fsynccontrol_fsync_enabled); + +static ssize_t fsynccontrol_status_read(struct device * dev, struct device_attribute * attr, char * buf) +{ + return sprintf(buf, "%u\n", (fsync_enabled ? 1 : 0)); +} + +static ssize_t fsynccontrol_status_write(struct device * dev, struct device_attribute * attr, const char * buf, size_t size) +{ + unsigned int data; + + if(sscanf(buf, "%u\n", &data) == 1) + { + if (data == 1) + { + pr_info("%s: FSYNCCONTROL fsync enabled\n", __FUNCTION__); + + fsync_enabled = true; + + } + else if (data == 0) + { + pr_info("%s: FSYNCCONTROL fsync disabled\n", __FUNCTION__); + + fsync_enabled = false; + } + else + { + pr_info("%s: invalid input range %u\n", __FUNCTION__, data); + } + } + else + { + pr_info("%s: invalid input\n", __FUNCTION__); + } + + return size; +} + +static ssize_t fsynccontrol_version(struct device * dev, struct device_attribute * attr, char * buf) +{ + return sprintf(buf, "%u\n", FSYNCCONTROL_VERSION); +} + +static DEVICE_ATTR(fsync_enabled, S_IRUGO | S_IWUGO, fsynccontrol_status_read, fsynccontrol_status_write); +static DEVICE_ATTR(version, S_IRUGO , fsynccontrol_version, NULL); + +static struct attribute *fsynccontrol_attributes[] = + { + &dev_attr_fsync_enabled.attr, + &dev_attr_version.attr, + NULL + }; + +static struct attribute_group fsynccontrol_group = + { + .attrs = fsynccontrol_attributes, + }; + +static struct miscdevice fsynccontrol_device = + { + .minor = MISC_DYNAMIC_MINOR, + .name = "fsynccontrol", + }; + +static int __init fsynccontrol_init(void) +{ + int ret; + + pr_info("%s misc_register(%s)\n", __FUNCTION__, fsynccontrol_device.name); + + ret = misc_register(&fsynccontrol_device); + + if (ret) + { + pr_err("%s misc_register(%s) fail\n", __FUNCTION__, fsynccontrol_device.name); + return 1; + } + + if (sysfs_create_group(&fsynccontrol_device.this_device->kobj, &fsynccontrol_group) < 0) + { + pr_err("%s sysfs_create_group fail\n", __FUNCTION__); + pr_err("Failed to create sysfs group for device (%s)!\n", fsynccontrol_device.name); + } + + return 0; +} + +device_initcall(fsynccontrol_init); diff --git a/fs/sync.c b/fs/sync.c index c98a7477edf..b918fb9649e 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -21,6 +21,10 @@ #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) +#ifdef CONFIG_FSYNC_CONTROL +extern bool fsynccontrol_fsync_enabled(); +#endif + /* * Do the filesystem syncing work. For simple filesystems * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to @@ -139,6 +143,11 @@ SYSCALL_DEFINE1(syncfs, int, fd) int ret; int fput_needed; +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + file = fget_light(fd, &fput_needed); if (!file) return -EBADF; @@ -165,6 +174,12 @@ SYSCALL_DEFINE1(syncfs, int, fd) */ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { + +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + if (!file->f_op || !file->f_op->fsync) return -EINVAL; return file->f_op->fsync(file, start, end, datasync); @@ -181,6 +196,11 @@ EXPORT_SYMBOL(vfs_fsync_range); */ int vfs_fsync(struct file *file, int datasync) { +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + return vfs_fsync_range(file, 0, LLONG_MAX, datasync); } EXPORT_SYMBOL(vfs_fsync); @@ -190,6 +210,11 @@ static int do_fsync(unsigned int fd, int datasync) struct file *file; int ret = -EBADF; +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + file = fget(fd); if (file) { ret = vfs_fsync(file, datasync); @@ -200,11 +225,21 @@ static int do_fsync(unsigned int fd, int datasync) SYSCALL_DEFINE1(fsync, unsigned int, fd) { +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + return do_fsync(fd, 0); } SYSCALL_DEFINE1(fdatasync, unsigned int, fd) { +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + return do_fsync(fd, 1); } @@ -218,6 +253,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) */ int generic_write_sync(struct file *file, loff_t pos, loff_t count) { +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) return 0; return vfs_fsync_range(file, pos, pos + count - 1, @@ -282,6 +322,11 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, int fput_needed; umode_t i_mode; +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif + ret = -EINVAL; if (flags & ~VALID_FLAGS) goto out; From 5e7ea89bfdc45e318cdb8919c6af36a25278c600 Mon Sep 17 00:00:00 2001 From: motley Date: Tue, 7 Aug 2012 19:36:39 -0400 Subject: [PATCH 052/678] mm: increase max and min readahead VM_MAX_READAHEAD 1024 VM_MIN_READAHEAD 32 --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index d1d9840093f..e7a00ac950b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1434,8 +1434,8 @@ int write_one_page(struct page *page, int wait); void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ -#define VM_MAX_READAHEAD 128 /* kbytes */ -#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ +#define VM_MAX_READAHEAD 1024 /* kbytes */ +#define VM_MIN_READAHEAD 32 /* kbytes (includes current page) */ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); From 30ba6ce3004dbed81a00e8e7af83ddf30085f933 Mon Sep 17 00:00:00 2001 From: motley Date: Tue, 7 Aug 2012 20:23:49 -0400 Subject: [PATCH 053/678] Use glibc memcpy and memmove --- include/linux/memcopy.h | 226 ++++++++++++++++++++++ lib/Makefile | 2 +- lib/memcopy.c | 403 ++++++++++++++++++++++++++++++++++++++++ lib/string.c | 32 ++-- 4 files changed, 644 insertions(+), 19 deletions(-) create mode 100644 include/linux/memcopy.h create mode 100644 lib/memcopy.c diff --git a/include/linux/memcopy.h b/include/linux/memcopy.h new file mode 100644 index 00000000000..9c65ac847f5 --- /dev/null +++ b/include/linux/memcopy.h @@ -0,0 +1,226 @@ +/* + * memcopy.h -- definitions for memory copy functions. Generic C version. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * The code is derived from the GNU C Library. + * Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. + */ +#ifndef _LINUX_MEMCOPY_H_ +#define _LINUX_MEMCOPY_H_ + +/* + * The strategy of the memory functions is: + * + * 1. Copy bytes until the destination pointer is aligned. + * + * 2. Copy words in unrolled loops. If the source and destination + * are not aligned in the same way, use word memory operations, + * but shift and merge two read words before writing. + * + * 3. Copy the few remaining bytes. + * + * This is fast on processors that have at least 10 registers for + * allocation by GCC, and that can access memory at reg+const in one + * instruction. + */ + +#include +#include +#include + +/* + * The macros defined in this file are: + * + * BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy) + * + * BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy) + * + * WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy) + * + * WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy) + * + * MERGE(old_word, sh_1, new_word, sh_2) + * + * MEM_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy) + * + * MEM_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy) + */ + +#define OP_T_THRESHOLD 16 + +/* + * Type to use for aligned memory operations. + * This should normally be the biggest type supported by a single load + * and store. + */ +#define op_t unsigned long int +#define OPSIZ (sizeof(op_t)) + +/* Type to use for unaligned operations. */ +typedef unsigned char byte; + +#ifndef MERGE +# ifdef __LITTLE_ENDIAN +# define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) +# elif defined(__BIG_ENDIAN) +# define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) +# else +# error "Macro MERGE() hasn't defined!" +# endif +#endif + +/* + * Copy exactly NBYTES bytes from SRC_BP to DST_BP, + * without any assumptions about alignment of the pointers. + */ +#ifndef BYTE_COPY_FWD +#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ +do { \ + size_t __nbytes = (nbytes); \ + while (__nbytes > 0) { \ + byte __x = ((byte *) src_bp)[0]; \ + src_bp += 1; \ + __nbytes -= 1; \ + ((byte *) dst_bp)[0] = __x; \ + dst_bp += 1; \ + } \ +} while (0) +#endif + +/* + * Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, + * beginning at the bytes right before the pointers and continuing towards + * smaller addresses. Don't assume anything about alignment of the + * pointers. + */ +#ifndef BYTE_COPY_BWD +#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \ +do { \ + size_t __nbytes = (nbytes); \ + while (__nbytes > 0) { \ + byte __x; \ + src_ep -= 1; \ + __x = ((byte *) src_ep)[0]; \ + dst_ep -= 1; \ + __nbytes -= 1; \ + ((byte *) dst_ep)[0] = __x; \ + } \ +} while (0) +#endif +/* + * Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with + * the assumption that DST_BP is aligned on an OPSIZ multiple. If + * not all bytes could be easily copied, store remaining number of bytes + * in NBYTES_LEFT, otherwise store 0. + */ +extern void _wordcopy_fwd_aligned(long int, long int, size_t); +extern void _wordcopy_fwd_dest_aligned(long int, long int, size_t); +#ifndef WORD_COPY_FWD +#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ +do { \ + if (src_bp % OPSIZ == 0) \ + _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \ + else \ + _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ);\ + \ + src_bp += (nbytes) & -OPSIZ; \ + dst_bp += (nbytes) & -OPSIZ; \ + (nbytes_left) = (nbytes) % OPSIZ; \ +} while (0) +#endif + +/* + * Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, + * beginning at the words (of type op_t) right before the pointers and + * continuing towards smaller addresses. May take advantage of that + * DST_END_PTR is aligned on an OPSIZ multiple. If not all bytes could be + * easily copied, store remaining number of bytes in NBYTES_REMAINING, + * otherwise store 0. + */ +extern void _wordcopy_bwd_aligned(long int, long int, size_t); +extern void _wordcopy_bwd_dest_aligned(long int, long int, size_t); +#ifndef WORD_COPY_BWD +#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \ +do { \ + if (src_ep % OPSIZ == 0) \ + _wordcopy_bwd_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \ + else \ + _wordcopy_bwd_dest_aligned (dst_ep, src_ep, (nbytes) / OPSIZ);\ + \ + src_ep -= (nbytes) & -OPSIZ; \ + dst_ep -= (nbytes) & -OPSIZ; \ + (nbytes_left) = (nbytes) % OPSIZ; \ +} while (0) +#endif + +/* Copy memory from the beginning to the end */ +#ifndef MEM_COPY_FWD +static __always_inline void mem_copy_fwd(unsigned long dstp, + unsigned long srcp, + size_t count) +{ + /* If there not too few bytes to copy, use word copy. */ + if (count >= OP_T_THRESHOLD) { + /* Copy just a few bytes to make dstp aligned. */ + count -= (-dstp) % OPSIZ; + BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); + + /* + * Copy from srcp to dstp taking advantage of the known + * alignment of dstp. Number if bytes remaining is put in + * the third argument. + */ + WORD_COPY_FWD(dstp, srcp, count, count); + + /* Fall out and copy the tail. */ + } + + /* There are just a few bytes to copy. Use byte memory operations. */ + BYTE_COPY_FWD(dstp, srcp, count); +} +#endif + +/* Copy memory from the end to the beginning. */ +#ifndef MEM_COPY_BWD +static __always_inline void mem_copy_bwd(unsigned long dstp, + unsigned long srcp, + size_t count) +{ + srcp += count; + dstp += count; + + /* If there not too few bytes to copy, use word copy. */ + if (count >= OP_T_THRESHOLD) { + /* Copy just a few bytes to make dstp aligned. */ + count -= dstp % OPSIZ; + BYTE_COPY_BWD(dstp, srcp, dstp % OPSIZ); + + /* + * Copy from srcp to dstp taking advantage of the known + * alignment of dstp. Number if bytes remaining is put in + * the third argument. + */ + WORD_COPY_BWD(dstp, srcp, count, count); + + /* Fall out and copy the tail. */ + } + + /* There are just a few bytes to copy. Use byte memory operations. */ + BYTE_COPY_BWD (dstp, srcp, count); +} +#endif + +#endif diff --git a/lib/Makefile b/lib/Makefile index 3f5bc6d903e..982dbf01b21 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o + is_single_threaded.o plist.o decompress.o memcopy.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/memcopy.c b/lib/memcopy.c new file mode 100644 index 00000000000..70fb6b2da1c --- /dev/null +++ b/lib/memcopy.c @@ -0,0 +1,403 @@ +/* + * memcopy.c -- subroutines for memory copy functions. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * The code is derived from the GNU C Library. + * Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. + */ + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ + +#include + +/* + * _wordcopy_fwd_aligned -- Copy block beginning at SRCP to block beginning + * at DSTP with LEN `op_t' words (not LEN bytes!). + * Both SRCP and DSTP should be aligned for memory operations on `op_t's. + */ +void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + switch (len % 8) { + case 2: + a0 = ((op_t *) srcp)[0]; + srcp -= 6 * OPSIZ; + dstp -= 7 * OPSIZ; + len += 6; + goto do1; + case 3: + a1 = ((op_t *) srcp)[0]; + srcp -= 5 * OPSIZ; + dstp -= 6 * OPSIZ; + len += 5; + goto do2; + case 4: + a0 = ((op_t *) srcp)[0]; + srcp -= 4 * OPSIZ; + dstp -= 5 * OPSIZ; + len += 4; + goto do3; + case 5: + a1 = ((op_t *) srcp)[0]; + srcp -= 3 * OPSIZ; + dstp -= 4 * OPSIZ; + len += 3; + goto do4; + case 6: + a0 = ((op_t *) srcp)[0]; + srcp -= 2 * OPSIZ; + dstp -= 3 * OPSIZ; + len += 2; + goto do5; + case 7: + a1 = ((op_t *) srcp)[0]; + srcp -= 1 * OPSIZ; + dstp -= 2 * OPSIZ; + len += 1; + goto do6; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + a0 = ((op_t *) srcp)[0]; + srcp -= 0 * OPSIZ; + dstp -= 1 * OPSIZ; + goto do7; + case 1: + a1 = ((op_t *) srcp)[0]; + srcp -=-1 * OPSIZ; + dstp -= 0 * OPSIZ; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do8; /* No-op. */ + } + + do { +do8: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a1; +do7: + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = a0; +do6: + a0 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = a1; +do5: + a1 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = a0; +do4: + a0 = ((op_t *) srcp)[4]; + ((op_t *) dstp)[4] = a1; +do3: + a1 = ((op_t *) srcp)[5]; + ((op_t *) dstp)[5] = a0; +do2: + a0 = ((op_t *) srcp)[6]; + ((op_t *) dstp)[6] = a1; +do1: + a1 = ((op_t *) srcp)[7]; + ((op_t *) dstp)[7] = a0; + + srcp += 8 * OPSIZ; + dstp += 8 * OPSIZ; + len -= 8; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[0] = a1; +} + +/* + * _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to block + * beginning at DSTP with LEN `op_t' words (not LEN bytes!). DSTP should + * be aligned for memory operations on `op_t's, but SRCP must *not* be aligned. + */ + +void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2, a3; + int sh_1, sh_2; + + /* + * Calculate how to shift a word read at the memory operation aligned + * srcp to make it aligned for copy. + */ + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* + * Make SRCP aligned by rounding it down to the beginning of the `op_t' + * it points in the middle of. + */ + srcp &= -OPSIZ; + + switch (len % 4) { + case 2: + a1 = ((op_t *) srcp)[0]; + a2 = ((op_t *) srcp)[1]; + srcp -= 1 * OPSIZ; + dstp -= 3 * OPSIZ; + len += 2; + goto do1; + case 3: + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + srcp -= 0 * OPSIZ; + dstp -= 2 * OPSIZ; + len += 1; + goto do2; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + a3 = ((op_t *) srcp)[0]; + a0 = ((op_t *) srcp)[1]; + srcp -=-1 * OPSIZ; + dstp -= 1 * OPSIZ; + len += 0; + goto do3; + case 1: + a2 = ((op_t *) srcp)[0]; + a3 = ((op_t *) srcp)[1]; + srcp -=-2 * OPSIZ; + dstp -= 0 * OPSIZ; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do { +do4: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); +do3: + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); +do2: + a2 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); +do1: + a3 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); + + srcp += 4 * OPSIZ; + dstp += 4 * OPSIZ; + len -= 4; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); +} + +/* + * _wordcopy_bwd_aligned -- Copy block finishing right before + * SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN + * bytes!). Both SRCP and DSTP should be aligned for memory operations + * on `op_t's. + */ +void _wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + switch (len % 8) { + case 2: + srcp -= 2 * OPSIZ; + dstp -= 1 * OPSIZ; + a0 = ((op_t *) srcp)[1]; + len += 6; + goto do1; + case 3: + srcp -= 3 * OPSIZ; + dstp -= 2 * OPSIZ; + a1 = ((op_t *) srcp)[2]; + len += 5; + goto do2; + case 4: + srcp -= 4 * OPSIZ; + dstp -= 3 * OPSIZ; + a0 = ((op_t *) srcp)[3]; + len += 4; + goto do3; + case 5: + srcp -= 5 * OPSIZ; + dstp -= 4 * OPSIZ; + a1 = ((op_t *) srcp)[4]; + len += 3; + goto do4; + case 6: + srcp -= 6 * OPSIZ; + dstp -= 5 * OPSIZ; + a0 = ((op_t *) srcp)[5]; + len += 2; + goto do5; + case 7: + srcp -= 7 * OPSIZ; + dstp -= 6 * OPSIZ; + a1 = ((op_t *) srcp)[6]; + len += 1; + goto do6; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + srcp -= 8 * OPSIZ; + dstp -= 7 * OPSIZ; + a0 = ((op_t *) srcp)[7]; + goto do7; + case 1: + srcp -= 9 * OPSIZ; + dstp -= 8 * OPSIZ; + a1 = ((op_t *) srcp)[8]; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do8; /* No-op. */ + } + + do { +do8: + a0 = ((op_t *) srcp)[7]; + ((op_t *) dstp)[7] = a1; +do7: + a1 = ((op_t *) srcp)[6]; + ((op_t *) dstp)[6] = a0; +do6: + a0 = ((op_t *) srcp)[5]; + ((op_t *) dstp)[5] = a1; +do5: + a1 = ((op_t *) srcp)[4]; + ((op_t *) dstp)[4] = a0; +do4: + a0 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = a1; +do3: + a1 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = a0; +do2: + a0 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = a1; +do1: + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = a0; + + srcp -= 8 * OPSIZ; + dstp -= 8 * OPSIZ; + len -= 8; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[7] = a1; +} + +/* + * _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to + * block finishing right before DSTP with LEN `op_t' words (not LEN bytes!). + * DSTP should be aligned for memory operations on `op_t', but SRCP must *not* + * be aligned. + */ +void _wordcopy_bwd_dest_aligned (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2, a3; + int sh_1, sh_2; + + /* + * Calculate how to shift a word read at the memory operation aligned + * srcp to make it aligned for copy. + */ + + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* + * Make srcp aligned by rounding it down to the beginning of the op_t + * it points in the middle of. + */ + srcp &= -OPSIZ; + srcp += OPSIZ; + + switch (len % 4) { + case 2: + srcp -= 3 * OPSIZ; + dstp -= 1 * OPSIZ; + a2 = ((op_t *) srcp)[2]; + a1 = ((op_t *) srcp)[1]; + len += 2; + goto do1; + case 3: + srcp -= 4 * OPSIZ; + dstp -= 2 * OPSIZ; + a3 = ((op_t *) srcp)[3]; + a2 = ((op_t *) srcp)[2]; + len += 1; + goto do2; + case 0: + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + return; + srcp -= 5 * OPSIZ; + dstp -= 3 * OPSIZ; + a0 = ((op_t *) srcp)[4]; + a3 = ((op_t *) srcp)[3]; + goto do3; + case 1: + srcp -= 6 * OPSIZ; + dstp -= 4 * OPSIZ; + a1 = ((op_t *) srcp)[5]; + a0 = ((op_t *) srcp)[4]; + len -= 1; + if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do { +do4: + a3 = ((op_t *) srcp)[3]; + ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); +do3: + a2 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); +do2: + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); +do1: + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); + + srcp -= 4 * OPSIZ; + dstp -= 4 * OPSIZ; + len -= 4; + } while (len != 0); + + /* + * This is the right position for do0. Please don't move it into + * the loop. + */ +do0: + ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); +} + diff --git a/lib/string.c b/lib/string.c index 01fad9b203e..67caaef1475 100644 --- a/lib/string.c +++ b/lib/string.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifndef __HAVE_ARCH_STRNICMP /** @@ -596,11 +597,12 @@ EXPORT_SYMBOL(memset); */ void *memcpy(void *dest, const void *src, size_t count) { - char *tmp = dest; - const char *s = src; + unsigned long dstp = (unsigned long)dest; + unsigned long srcp = (unsigned long)src; + + /* Copy from the beginning to the end */ + mem_copy_fwd(dstp, srcp, count); - while (count--) - *tmp++ = *s++; return dest; } EXPORT_SYMBOL(memcpy); @@ -617,21 +619,15 @@ EXPORT_SYMBOL(memcpy); */ void *memmove(void *dest, const void *src, size_t count) { - char *tmp; - const char *s; - - if (dest <= src) { - tmp = dest; - s = src; - while (count--) - *tmp++ = *s++; + unsigned long dstp = (unsigned long)dest; + unsigned long srcp = (unsigned long)src; + + if (dest - src >= count) { + /* Copy from the beginning to the end */ + mem_copy_fwd(dstp, srcp, count); } else { - tmp = dest; - tmp += count; - s = src; - s += count; - while (count--) - *--tmp = *--s; + /* Copy from the end to the beginning */ + mem_copy_bwd(dstp, srcp, count); } return dest; } From 64dfe65e4a3de99bbf3696e6143df253180b4cfc Mon Sep 17 00:00:00 2001 From: Ezekeel Date: Wed, 8 Aug 2012 10:09:35 -0400 Subject: [PATCH 054/678] Added kernel config option 'BCMDHD_WIFI_PM' Conflicts: drivers/net/wireless/bcmdhd/dhd_linux.c --- drivers/net/wireless/bcmdhd/Kconfig | 8 ++++++++ drivers/net/wireless/bcmdhd/dhd_linux.c | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/net/wireless/bcmdhd/Kconfig b/drivers/net/wireless/bcmdhd/Kconfig index 8b6b92f6243..7448a941279 100644 --- a/drivers/net/wireless/bcmdhd/Kconfig +++ b/drivers/net/wireless/bcmdhd/Kconfig @@ -52,3 +52,11 @@ config DHD_ENABLE_P2P default n ---help--- Use Enable Wifi Direct + +config BCMDHD_WIFI_PM + bool "Enable support for changing the WiFi power mode" + depends on BCMDHD + default n + help + Enable support for changing the WiFi power mode for + screen-off. diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index c2f4d33e470..6cc2e1ed89b 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -537,6 +537,12 @@ static void dhd_set_packet_filter(int value, dhd_pub_t *dhd) #endif } +#ifdef CONFIG_BCMDHD_WIFI_PM +static int wifi_pm = 0; + +module_param(wifi_pm, int, 0755); +#endif + #ifdef DYNAMIC_DTIM_SKIP static int dhd_dtim_thread(void *data) @@ -643,6 +649,11 @@ static int dhd_set_suspend(int value, dhd_pub_t *dhd) __FUNCTION__, value, dhd->in_suspend)); dhd_suspend_lock(dhd); + +#ifdef CONFIG_BCMDHD_WIFI_PM + if (wifi_pm == 1) + power_mode = PM_FAST; +#endif if (dhd && dhd->up) { if (value && dhd->in_suspend) { From 25fef14bd2c93b54c07264f315538d6bf37c7231 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 8 Aug 2012 21:16:15 -0400 Subject: [PATCH 055/678] Tweaking of compiler optimizations --- Makefile | 24 ++++++++++++------------ arch/arm/Makefile | 2 +- arch/arm/vfp/Makefile | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 7e38fdac3cd..80978c684f4 100644 --- a/Makefile +++ b/Makefile @@ -350,8 +350,8 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ CFLAGS_MODULE = AFLAGS_MODULE = LDFLAGS_MODULE = -CFLAGS_KERNEL = -O2 -mtune=cortex-a9 -ftree-vectorize -ffast-math -fsingle-precision-constant -AFLAGS_KERNEL = -O2 -mtune=cortex-a9 -ftree-vectorize -ffast-math -fsingle-precision-constant +CFLAGS_KERNEL = +AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage @@ -365,17 +365,17 @@ LINUXINCLUDE := -I$(srctree)/arch/$(hdr-arch)/include \ KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common \ - -Werror-implicit-function-declaration \ - -Wno-format-security \ - -fno-delete-null-pointer-checks \ - -mtune=cortex-a9 -mfpu=neon \ - -fmodulo-sched -fmodulo-sched-allow-regmoves \ - -funswitch-loops -fpredictive-commoning -fgcse-after-reload \ - -ftree-vectorize -floop-interchange -floop-strip-mine -floop-block + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration \ + -Wno-format-security \ + -fno-delete-null-pointer-checks -mno-unaligned-access \ + -mtune=cortex-a9 -march=armv7-a -mfpu=neon \ + -fpredictive-commoning -fgcse-after-reload -ftree-vectorize \ + -fipa-cp-clone -fsingle-precision-constant -pipe \ + -funswitch-loops -floop-interchange \ + -floop-strip-mine -floop-block - - KBUILD_AFLAGS_KERNEL := +KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := KBUILD_AFLAGS := -D__ASSEMBLY__ KBUILD_AFLAGS_MODULE := -DMODULE diff --git a/arch/arm/Makefile b/arch/arm/Makefile index f6fbf49bea2..61bf37448ef 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -57,7 +57,7 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option, -O3 -march=armv7-a -mcpu=cortex-a9 -mtune=cortex-a9 -mfloat-abi=softfp -mfpu=neon -funsafe-math-optimizations -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index 28d48ff3db7..6de73aab019 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -7,7 +7,7 @@ # ccflags-y := -DDEBUG # asflags-y := -DDEBUG -KBUILD_AFLAGS :=$(KBUILD_AFLAGS: -O3 -msoft-float=-Wa,-mfpu=neon -funsafe-math-optimizations) +KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp) LDFLAGS +=--no-warn-mismatch obj-y += vfp.o From 129fbf3bd267f7f14b1aa20ae3d5c322cd90839f Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 8 Aug 2012 21:18:46 -0400 Subject: [PATCH 056/678] mm: not really seeing any benefits from this experiment, so backing off VM_MAX_READAHEAD back to 256 --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e7a00ac950b..f6ce763ac32 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1434,7 +1434,7 @@ int write_one_page(struct page *page, int wait); void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ -#define VM_MAX_READAHEAD 1024 /* kbytes */ +#define VM_MAX_READAHEAD 256 /* kbytes */ #define VM_MIN_READAHEAD 32 /* kbytes (includes current page) */ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, From a421671dcbd7d92fecc5cc4eaa6540039e8f857f Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 8 Aug 2012 22:07:51 -0400 Subject: [PATCH 057/678] grouper panel: increased panel clocks a tad bit for each GPU overclock frequency --- arch/arm/mach-tegra/board-grouper-panel.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 2c3c43e3788..6e85cd350f5 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -390,16 +390,13 @@ static struct tegra_dc_mode grouper_panel_modes[] = { { #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_446 - /* 1280x800@62Hz */ - .pclk = 74180000, + .pclk = 75000000, #endif #ifdef CONFIG_GPU_OC_484 - /* 1280x800@63Hz */ - .pclk = 77395348, + .pclk = 80000000, #endif #ifdef CONFIG_GPU_OC_520 - /* 1280x800@65Hz */ - .pclk = 81170731, + .pclk = 85000000, #endif #else /* 1280x800@60Hz */ From 9bb61d24f10cccdf0203655085acd6f17be4fd75 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 8 Aug 2012 23:02:15 -0400 Subject: [PATCH 058/678] v1.1.0 alpha changes for mach-tegra -Experimental: now forging speedo id 4 and process id 2 so that EDP limits are slightly raised and it narrows everyone down to a common DVFS record for everyone. Raised Dynanic EDP governor to 67C (from 60C) to give a little more room before edp is allowed to enable. -Minor cpu voltage tweaks at the top end aiming at slightly better battery for those that don't undervolt. -Lowered the dvfs cold offsets from 50 to 25 for the top 4 slots. This will give folks a little more breathing room when undervolting and may help cold performance a bit if your voltages are lowered close to their lower limit. -cpu transtition latency lowered - fairly certain that it only affects OnDemand governor and not the default Interactive governor. -Reverted most of the adjustments to the tegra 3 algorirthim for bringing cpus online and offline. I think it livened it up, but at the expense of battery. --- arch/arm/mach-tegra/cpu-tegra.c | 4 ++-- arch/arm/mach-tegra/cpu-tegra3.c | 19 ++++--------------- arch/arm/mach-tegra/cpuidle.c | 2 +- arch/arm/mach-tegra/tegra3_dvfs.c | 10 ++++++---- arch/arm/mach-tegra/tegra3_speedo.c | 9 +++++---- 5 files changed, 18 insertions(+), 26 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index dda18c699d1..014869eb788 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -59,7 +59,7 @@ static int suspend_index; static bool force_policy_max = 1; #define TEGRA3_OVERCLOCK -#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (60) +#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (67) static bool edp_enable = 1; static int force_policy_max_set(const char *arg, const struct kernel_param *kp) @@ -727,7 +727,7 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) target_cpu_speed[policy->cpu] = policy->cur; /* FIXME: what's the actual transition time? */ - policy->cpuinfo.transition_latency = 300 * 1000; + policy->cpuinfo.transition_latency = 40 * 1000; policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->related_cpus, cpu_possible_mask); diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index b221ff9353c..ce0b658eab9 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -41,7 +41,7 @@ #define INITIAL_STATE TEGRA_HP_DISABLED #define UP2G0_DELAY_MS 70 #define UP2Gn_DELAY_MS 100 -#define DOWN_DELAY_MS 750 +#define DOWN_DELAY_MS 1000 static struct mutex *tegra3_cpu_lock; @@ -66,7 +66,7 @@ module_param(idle_bottom_freq, uint, 0644); static int mp_overhead = 10; module_param(mp_overhead, int, 0644); -static int balance_level = 60; +static int balance_level = 70; module_param(balance_level, int, 0644); static struct clk *cpu_clk; @@ -189,22 +189,11 @@ enum { static noinline int tegra_cpu_speed_balance(void) { unsigned long highest_speed = tegra_cpu_highest_speed(); + unsigned long balanced_speed = highest_speed * balance_level / 100; + unsigned long skewed_speed = balanced_speed / 2; unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; unsigned int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); - unsigned long balanced_speed; - - switch (highest_speed) { - case 1700000: - case 1600000: - case 1500000: - balanced_speed = 860000; - break; - default: - highest_speed * balance_level / 100; - } - - unsigned long skewed_speed = balanced_speed / 2; /* balanced: freq targets for all CPUs are above 50% of highest speed biased: freq target for at least one CPU is below 50% threshold diff --git a/arch/arm/mach-tegra/cpuidle.c b/arch/arm/mach-tegra/cpuidle.c index 47d5996e596..0e0ec7f2dbf 100644 --- a/arch/arm/mach-tegra/cpuidle.c +++ b/arch/arm/mach-tegra/cpuidle.c @@ -80,7 +80,7 @@ static int tegra_idle_enter_lp3(struct cpuidle_device *dev, return (int)us; } -static bool lp2_in_idle __read_mostly = false; +static bool lp2_in_idle __read_mostly = true; #ifdef CONFIG_PM_SLEEP static bool lp2_in_idle_modifiable __read_mostly = true; diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 3696dfef908..687f25b1b62 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1175, 1190, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -38,10 +38,10 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1175, 1190, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25}; + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; static const int core_millivolts[MAX_DVFS_FREQS] = { 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350}; @@ -167,7 +167,9 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500), CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), - CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500), +//Nexus 7 - faking speedo id = 4, process id =2 + CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1624), +// CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500), CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index 8d450dd6734..7f49e164624 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -233,9 +233,9 @@ static void rev_sku_to_speedo_ids(int rev, int sku) case 0x83: /* T30L or T30S */ switch (package_id) { case 1: /* MID => T30L */ - cpu_speedo_id = 7; + cpu_speedo_id = 4; soc_speedo_id = 1; - threshold_index = 10; + threshold_index = 7; break; case 2: /* DSC => T30S */ cpu_speedo_id = 3; @@ -428,7 +428,8 @@ void tegra_init_speedo_data(void) break; } } - cpu_process_id = iv -1; + cpu_process_id = 2; //iv -1; + if (cpu_process_id == -1) { pr_err("****************************************************"); @@ -448,7 +449,7 @@ void tegra_init_speedo_data(void) break; } } - core_process_id = iv -1; + core_process_id = 1; //iv -1; if (core_process_id == -1) { pr_err("****************************************************"); From 50f06980264efd5b6500942ca7f95c57308e934a Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 8 Aug 2012 23:03:56 -0400 Subject: [PATCH 059/678] OnDemand tweaks. Interactive is still the default governor, but this should make OnDemand more usable. --- drivers/cpufreq/cpufreq_ondemand.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a9442a349bb..111b1e07dcb 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,12 +28,12 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) -#define DEF_SAMPLING_DOWN_FACTOR (1) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (15) +#define DEF_FREQUENCY_UP_THRESHOLD (75) +#define DEF_SAMPLING_DOWN_FACTOR (2) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) -#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define MICRO_FREQUENCY_UP_THRESHOLD (90) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) From 4582eb937af72271d9cc9626ddf87aacd0331356 Mon Sep 17 00:00:00 2001 From: morfic Date: Wed, 8 Aug 2012 23:05:28 -0400 Subject: [PATCH 060/678] Deadline i/o scheduler - added morfic's 1:1 secret sauce that I remember back from the Iconia A500 days. --- block/deadline-iosched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c644137d9cd..586f4e72929 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -19,8 +19,8 @@ */ static const int read_expire = HZ / 2; /* max time before a read is submitted. */ static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -static const int writes_starved = 2; /* max times reads can starve a write */ -static const int fifo_batch = 16; /* # of sequential requests treated as one +static const int writes_starved = 1; /* max times reads can starve a write */ +static const int fifo_batch = 1; /* # of sequential requests treated as one by the above parameters. For throughput. */ struct deadline_data { From 26ae5f0304bc719a21ae6773189dce424dc34a22 Mon Sep 17 00:00:00 2001 From: motley Date: Wed, 8 Aug 2012 23:06:31 -0400 Subject: [PATCH 061/678] Updated defconfig -Removed some extra debug stuff to lighten the load. --- arch/arm/configs/motley_grouper_defconfig | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index c94cdc707a4..c2c2c12db79 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -74,7 +74,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 CONFIG_CGROUPS=y -CONFIG_CGROUP_DEBUG=y +# CONFIG_CGROUP_DEBUG is not set CONFIG_CGROUP_FREEZER=y # CONFIG_CGROUP_DEVICE is not set # CONFIG_CPUSETS is not set @@ -313,7 +313,6 @@ CONFIG_VOLTAGE_CONTROL=y CONFIG_GPU_OVERCLOCK=y # CONFIG_GPU_OC_446 is not set CONFIG_GPU_OC_484=y -# CONFIG_GPU_OC_500 is not set # CONFIG_GPU_OC_520 is not set CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y @@ -465,7 +464,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_ZONE_DMA_FLAG=0 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y -# CONFIG_KSM is not set +CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 # CONFIG_CLEANCACHE is not set CONFIG_FORCE_MAX_ZONEORDER=11 @@ -560,10 +559,7 @@ CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y CONFIG_PM_RUNTIME=y CONFIG_PM=y -CONFIG_PM_DEBUG=y -# CONFIG_PM_ADVANCED_DEBUG is not set -# CONFIG_PM_TEST_SUSPEND is not set -CONFIG_CAN_PM_TRACE=y +# CONFIG_PM_DEBUG is not set # CONFIG_APM_EMULATION is not set CONFIG_PM_CLK=y CONFIG_SUSPEND_TIME=y @@ -1069,6 +1065,7 @@ CONFIG_EEPROM_AT24=y CONFIG_TEGRA_BB_SUPPORT=y CONFIG_TEGRA_BB_POWER=y CONFIG_TEGRA_BB_M7400=y +CONFIG_FSYNC_CONTROL=y CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -1236,6 +1233,7 @@ CONFIG_BCMDHD_NVRAM_PATH="/system/etc/nvram.txt" # CONFIG_DHD_USE_STATIC_BUF is not set # CONFIG_DHD_USE_SCHED_SCAN is not set CONFIG_DHD_ENABLE_P2P=y +CONFIG_BCMDHD_WIFI_PM=y # CONFIG_HOSTAP is not set # CONFIG_IPW2100 is not set # CONFIG_IPW2200 is not set @@ -3113,7 +3111,7 @@ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 # CONFIG_DETECT_HUNG_TASK is not set -CONFIG_SCHED_DEBUG=y +# CONFIG_SCHED_DEBUG is not set # CONFIG_SCHEDSTATS is not set CONFIG_TIMER_STATS=y # CONFIG_DEBUG_OBJECTS is not set @@ -3134,7 +3132,7 @@ CONFIG_STACKTRACE=y # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_HIGHMEM is not set -CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_WRITECOUNT is not set From 3c67ab8f9cf36f29207dd852b4768d09203705fe Mon Sep 17 00:00:00 2001 From: motley Date: Fri, 10 Aug 2012 08:13:14 -0400 Subject: [PATCH 062/678] v1.1.1 changes -DVFS tweaks and drop top cpu frequency to 1600, not much is lost and it should now be stable for everyone I hope. We pushed the envelope to 1.7 and 1.624 and now we are back to real world sensible decisions. My Nenamark2 520 GPU scores actually went up. -Dynamic EDP temp adjusted from 67 to 68 to catch temp notifier quicker when cooling back down. --- arch/arm/mach-tegra/cpu-tegra.c | 2 +- arch/arm/mach-tegra/tegra3_clocks.c | 4 ++-- arch/arm/mach-tegra/tegra3_dvfs.c | 21 +++++++++------------ 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 014869eb788..18b670cbfc6 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -59,7 +59,7 @@ static int suspend_index; static bool force_policy_max = 1; #define TEGRA3_OVERCLOCK -#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (67) +#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (68) static bool edp_enable = 1; static int force_policy_max_set(const char *arg, const struct kernel_param *kp) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 63a16c72db7..35968b47850 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4653,7 +4653,7 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { {10, 1400000 }, {11, 1500000 }, {12, 1600000 }, - {13, 1624000 }, + {13, 1700000 }, {14, CPUFREQ_TABLE_END }, }; @@ -4663,7 +4663,7 @@ static struct tegra_cpufreq_table_data cpufreq_tables[] = { { freq_table_1p3GHz, 2, 10 }, { freq_table_1p4GHz, 2, 11 }, { freq_table_1p5GHz, 2, 12 }, - { freq_table_1p6GHz, 2, 12 }, + { freq_table_1p6GHz, 2, 13 }, { freq_table_1p7GHz, 2, 12 }, }; diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 687f25b1b62..18234e1b06d 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1175, 1190, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -38,7 +38,7 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1165, 1175, 1190, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; @@ -84,12 +84,9 @@ static int tegra3_get_core_floor_mv(int cpu_mv) return 1000; if (cpu_mv < 1000) return 1100; - if ((tegra_cpu_speedo_id() < 2) || - (tegra_cpu_speedo_id() == 4) || - (tegra_cpu_speedo_id() == 7) || - (tegra_cpu_speedo_id() == 8)) - return 1200; - if (cpu_mv < 1100) + if (cpu_mv < 1050) + return 1150; + if ((cpu_mv < 1100) || (tegra_cpu_speedo_id() == 4)) return 1200; if (cpu_mv <= 1250) return 1300; @@ -146,7 +143,7 @@ static struct dvfs_relationship tegra3_dvfs_relationships[] = { } static struct dvfs cpu_dvfs_table[] = { - /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1190, 1200, 1237 */ + /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ CPU_DVFS("cpu_g", 0, 0, MHZ, 1, 1, 684, 684, 817, 817, 1026, 1102, 1149, 1187, 1225, 1282, 1300), CPU_DVFS("cpu_g", 0, 1, MHZ, 1, 1, 807, 807, 948, 948, 1117, 1171, 1206, 1300), CPU_DVFS("cpu_g", 0, 2, MHZ, 1, 1, 883, 883, 1039, 1039, 1178, 1206, 1300), @@ -167,9 +164,9 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500), CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), -//Nexus 7 - faking speedo id = 4, process id =2 - CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1500, 1540, 1600, 1624), -// CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500), + /* Nexus 7 - faking speedo id = 4, process id =2 + Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ + CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1470, 1500, 1540, 1600), CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), From a8c4856089be35e9673d5a24f4f55720f4af6a7b Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 1 Nov 2012 21:37:18 -0400 Subject: [PATCH 063/678] board-grouper-panel.c: Remove pixel clock increase and lower min brightness --- arch/arm/mach-tegra/board-grouper-panel.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 6e85cd350f5..96e479f92af 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -66,8 +66,8 @@ static struct regulator *grouper_lvds_reg; static struct regulator *grouper_lvds_vdd_panel; static tegra_dc_bl_output grouper_bl_output_measured = { - 0, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 14, 15, + 0, 5, 5, 5, 5, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, @@ -388,20 +388,7 @@ static struct resource grouper_disp2_resources[] = { static struct tegra_dc_mode grouper_panel_modes[] = { { -#ifdef CONFIG_GPU_OVERCLOCK -#ifdef CONFIG_GPU_OC_446 - .pclk = 75000000, -#endif -#ifdef CONFIG_GPU_OC_484 - .pclk = 80000000, -#endif -#ifdef CONFIG_GPU_OC_520 - .pclk = 85000000, -#endif -#else - /* 1280x800@60Hz */ - .pclk = 74180000, -#endif + .pclk = 68000000, .h_ref_to_sync = 1, .v_ref_to_sync = 1, .h_sync_width = 24, From 61f4a228972cc7fcddc76f16b6673283a63e4810 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 1 Nov 2012 21:37:48 -0400 Subject: [PATCH 064/678] tegra: lp core overclock to 620Mhz --- arch/arm/mach-tegra/tegra3_clocks.c | 12 ++++++------ arch/arm/mach-tegra/tegra3_dvfs.c | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 35968b47850..48e216ceef8 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4562,7 +4562,7 @@ static struct cpufreq_frequency_table freq_table_1p0GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 760000 }, { 7, 860000 }, { 8, 912000 }, @@ -4576,7 +4576,7 @@ static struct cpufreq_frequency_table freq_table_1p3GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 760000 }, { 7, 860000 }, { 8, 1000000 }, @@ -4592,7 +4592,7 @@ static struct cpufreq_frequency_table freq_table_1p4GHz[] = { { 2, 204000 }, { 3, 370000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 760000 }, { 7, 860000 }, { 8, 1000000 }, @@ -4609,7 +4609,7 @@ static struct cpufreq_frequency_table freq_table_1p5GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 760000 }, { 7, 860000 }, { 8, 1000000 }, @@ -4627,7 +4627,7 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 860000 }, { 7, 1000000 }, { 8, 1100000 }, @@ -4645,7 +4645,7 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 640000 }, + { 5, 620000 }, { 6, 860000 }, { 7, 1000000 }, { 8, 1200000 }, diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 18234e1b06d..8a01592095c 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -220,7 +220,8 @@ static struct dvfs core_dvfs_table[] = { /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ /* Clock limits for internal blocks, PLLs */ CORE_DVFS("cpu_lp", 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), +// CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), + CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), CORE_DVFS("cpu_lp", 2, 1, KHZ, 204000, 295000, 370000, 428000, 475000, 513000, 579000, 620000, 620000), CORE_DVFS("cpu_lp", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 450000, 450000, 450000), From e98240d80565921c48e37a7297ba1d444f118995 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 30 Oct 2012 22:42:17 -0400 Subject: [PATCH 065/678] Makefile and build changes Conflicts: arch/arm/Makefile --- scripts/mkcompile_h | 4 ++-- scripts/setlocalversion | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index f221ddf6908..c5242fd4f26 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -73,8 +73,8 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_BY \"`echo $LINUX_COMPILE_BY | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_HOST \"`echo $LINUX_COMPILE_HOST | $UTS_TRUNCATE`\" + echo \#define LINUX_COMPILE_BY \"`echo Metallice`\" + echo \#define LINUX_COMPILE_HOST \"`echo Nexus7`\" echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -n 1`\" ) > .tmpcompile diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 4d403844e13..ea4fc2537d2 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -170,7 +170,7 @@ else # LOCALVERSION= is not specified if test "${LOCALVERSION+set}" != "set"; then scm=$(scm_version --short) - res="$res${scm:++}" +# res="$res${scm:++}" fi fi From 7091a97bc58235133f13e7ea58b025c53e884899 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 1 Nov 2012 22:57:04 -0400 Subject: [PATCH 066/678] cpu-tegra3.c: modify down and up delays --- arch/arm/mach-tegra/cpu-tegra3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index ce0b658eab9..b0b0952572a 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,9 +39,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 70 +#define UP2G0_DELAY_MS 500 #define UP2Gn_DELAY_MS 100 -#define DOWN_DELAY_MS 1000 +#define DOWN_DELAY_MS 500 static struct mutex *tegra3_cpu_lock; From 3cd4c4e21e334b8b3656eef62ecf2bc9dae2b870 Mon Sep 17 00:00:00 2001 From: imoseyon Date: Sun, 28 Oct 2012 15:52:00 -0700 Subject: [PATCH 067/678] fsl_udc: force high current charging for USB --- drivers/usb/gadget/fsl_udc_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 37647d11896..618d20b1ee4 100755 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -310,6 +310,8 @@ static void cable_detection_work_handler(struct work_struct *w) if(!s_cable_info.ac_connected) { printk(KERN_INFO "The USB cable is connected\n"); s_cable_info.cable_status = 0x01; //0001 + pr_info("[imoseyon] force hc callback\n"); + smb347_hc_mode_callback(1,1); } else { printk(KERN_INFO "AC adapter connect\n"); s_cable_info.cable_status = 0x03; //0011 From ad81c77c6af368359a7d74a6d9cfd1d635347592 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 4 Nov 2012 19:31:46 -0500 Subject: [PATCH 068/678] cpu-tegra.c: add coldboot for setting max frequency on boot --- arch/arm/mach-tegra/cpu-tegra.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 18b670cbfc6..b43e127d363 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -57,6 +57,7 @@ static bool is_suspended; static int suspend_index; static bool force_policy_max = 1; +static bool coldstart = 1; #define TEGRA3_OVERCLOCK #define TEGRA3_DYNAMIC_EDP_THRES_TEMP (68) @@ -733,11 +734,14 @@ static int tegra_cpu_init(struct cpufreq_policy *policy) cpumask_copy(policy->related_cpus, cpu_possible_mask); if (policy->cpu == 0) { - /* set to 1.3GHz stock freq on init */ - policy->max = 1300000; register_pm_notifier(&tegra_cpu_pm_notifier); } + if (coldstart == 1) { + policy->max = 1300000; + coldstart = 0; + } + return 0; } From 3e5f3ad08ee25c16743151aebc927dc9f458c759 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 4 Nov 2012 19:32:25 -0500 Subject: [PATCH 069/678] tegra: changes to voltages and voltage table --- arch/arm/mach-tegra/tegra3_dvfs.c | 20 +++++++++++++------- drivers/cpufreq/cpufreq.c | 4 ++-- 2 files changed, 15 insertions(+), 9 deletions(-) mode change 100755 => 100644 drivers/cpufreq/cpufreq.c diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 8a01592095c..66b282e8d8c 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,8 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; +// 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -38,10 +39,10 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; - +// 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; static const int core_millivolts[MAX_DVFS_FREQS] = { 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350}; @@ -59,7 +60,7 @@ static int cpu_below_core = VDD_CPU_BELOW_VDD_CORE; static struct dvfs_rail tegra3_dvfs_rail_vdd_cpu = { .reg_id = "vdd_cpu", .max_millivolts = 1250, - .min_millivolts = 800, + .min_millivolts = 750, .step = VDD_SAFE_STEP, .jmp_to_zero = true, }; @@ -164,9 +165,14 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500), CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), + /* Nexus 7 - faking speedo id = 4, process id =2 - Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ - CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 540, 700, 720, 860, 880, 1050, 1150, 1200, 1240, 1300, 1340, 1380, 1400, 1470, 1500, 1540, 1600), + Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ +// CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), + + /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ + CPU_DVFS("cpu_g", 4, 2, MHZ, 475, 475, 620, 620, 860, 860, 1000, 1000, 1100, 1100, 1200, 1300, 1400, 1500, 1600), + CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c old mode 100755 new mode 100644 index 0e3074089b9..9cd242e282c --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -614,7 +614,7 @@ static ssize_t show_UV_mV_table(struct cpufreq_policy *policy, char *buf) struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - i = cpu_clk_g->dvfs->num_freqs; + i = cpu_clk_g->dvfs->num_freqs - 3; for(i--; i >=0; i--) { out += sprintf(out, "%lumhz: %i mV\n", @@ -635,7 +635,7 @@ static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_ struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - i = cpu_clk_g->dvfs->num_freqs; + i = cpu_clk_g->dvfs->num_freqs - 3; for(i--; i >= 0; i--) { From a5439ddd77ac24480993567539bb67c111c2568f Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Sun, 21 Aug 2011 20:21:57 +0000 Subject: [PATCH 070/678] Proportional Rate Reduction for TCP. This patch implements Proportional Rate Reduction (PRR) for TCP. PRR is an algorithm that determines TCP's sending rate in fast recovery. PRR avoids excessive window reductions and aims for the actual congestion window size at the end of recovery to be as close as possible to the window determined by the congestion control algorithm. PRR also improves accuracy of the amount of data sent during loss recovery. The patch implements the recommended flavor of PRR called PRR-SSRB (Proportional rate reduction with slow start reduction bound) and replaces the existing rate halving algorithm. PRR improves upon the existing Linux fast recovery under a number of conditions including: 1) burst losses where the losses implicitly reduce the amount of outstanding data (pipe) below the ssthresh value selected by the congestion control algorithm and, 2) losses near the end of short flows where application runs out of data to send. As an example, with the existing rate halving implementation a single loss event can cause a connection carrying short Web transactions to go into the slow start mode after the recovery. This is because during recovery Linux pulls the congestion window down to packets_in_flight+1 on every ACK. A short Web response often runs out of new data to send and its pipe reduces to zero by the end of recovery when all its packets are drained from the network. Subsequent HTTP responses using the same connection will have to slow start to raise cwnd to ssthresh. PRR on the other hand aims for the cwnd to be as close as possible to ssthresh by the end of recovery. A description of PRR and a discussion of its performance can be found at the following links: - IETF Draft: http://tools.ietf.org/html/draft-mathis-tcpm-proportional-rate-reduction-01 - IETF Slides: http://www.ietf.org/proceedings/80/slides/tcpm-6.pdf http://tools.ietf.org/agenda/81/slides/tcpm-2.pdf - Paper to appear in Internet Measurements Conference (IMC) 2011: Improving TCP Loss Recovery Nandita Dukkipati, Matt Mathis, Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 +++ net/ipv4/tcp_input.c | 58 ++++++++++++++++++++++++++++++++++++++----- net/ipv4/tcp_output.c | 7 +++++- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 531ede8006d..6b63b310af3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -379,6 +379,10 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; + u32 prior_cwnd; /* Congestion window at start of Recovery. */ + u32 prr_delivered; /* Number of newly delivered packets to + * receiver in Recovery. */ + u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d73aab3fbfc..e12dfb3f62e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2828,9 +2828,13 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - /* Do not moderate cwnd if it's already undone in cwr or recovery */ - if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { - tp->snd_cwnd = tp->snd_ssthresh; + + /* Do not moderate cwnd if it's already undone in cwr or recovery. */ + if (tp->undo_marker) { + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + else /* PRR */ + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2948,6 +2952,38 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, + int fast_rexmit, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2959,7 +2995,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit); * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, + int newly_acked_sacked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3109,13 +3146,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_down(sk, flag); + tp->prr_delivered += newly_acked_sacked; + tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); tcp_xmit_retransmit_queue(sk); } @@ -3630,6 +3671,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets; + int prior_sacked = tp->sacked_out; + int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3701,6 +3744,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + newly_acked_sacked = (prior_packets - prior_sacked) - + (tp->packets_out - tp->sacked_out); + if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3713,7 +3759,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - flag); + newly_acked_sacked, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index faf257b9415..b1f32b459e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1794,11 +1794,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tcp_event_new_data_sent(sk, skb); tcp_minshall_update(tp, mss_now, skb); - sent_pkts++; + sent_pkts += tcp_skb_pcount(skb); if (push_one) break; } + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += sent_pkts; if (likely(sent_pkts)) { tcp_cwnd_validate(sk); @@ -2292,6 +2294,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) return; NET_INC_STATS_BH(sock_net(sk), mib_idx); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += tcp_skb_pcount(skb); + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, From cee090a06aaea44a25b6ad0d54033b5106e73ea2 Mon Sep 17 00:00:00 2001 From: Pavan Kunapuli Date: Mon, 7 May 2012 17:43:25 +0530 Subject: [PATCH 071/678] mmc: tegra: Set eMMC DDR clock based on emc clock Set the eMMC ddr mode clock dynamically based on emc clock rate. If ddr clock limit is specified and the emc clock is less than max emc freq, then limit emmc ddr clk. If not, set the max eMMC ddr clock. Bug 967719 Change-Id: I9f70077c4ac4bb1f3e6d894fcb8420b1aba284dd Signed-off-by: Pavan Kunapuli Reviewed-on: http://git-master/r/100579 Reviewed-by: Simone Willett Tested-by: Simone Willett --- drivers/mmc/host/sdhci-tegra.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 8f450fafbf3..e95f9d852ae 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -111,9 +111,13 @@ struct tegra_sdhci_host { unsigned int vddio_max_uv; /* max clk supported by the platform */ unsigned int max_clk_limit; + /* max ddr clk supported by the platform */ + unsigned int ddr_clk_limit; struct tegra_io_dpd *dpd; bool card_present; bool is_rail_enabled; + struct clk *emc_clk; + unsigned int emc_max_clk; }; static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg) @@ -363,6 +367,7 @@ static void tegra_sdhci_set_clk_rate(struct sdhci_host *sdhci, struct sdhci_pltfm_host *pltfm_host = sdhci_priv(sdhci); struct tegra_sdhci_host *tegra_host = pltfm_host->priv; unsigned int clk_rate; + unsigned int emc_clk; if (sdhci->mmc->card && mmc_card_ddr_mode(sdhci->mmc->card)) { @@ -370,7 +375,16 @@ static void tegra_sdhci_set_clk_rate(struct sdhci_host *sdhci, * In ddr mode, tegra sdmmc controller clock frequency * should be double the card clock frequency. */ - clk_rate = clock * 2; + if (tegra_host->ddr_clk_limit) { + clk_rate = tegra_host->ddr_clk_limit * 2; + if (tegra_host->emc_clk) { + emc_clk = clk_get_rate(tegra_host->emc_clk); + if (emc_clk == tegra_host->emc_max_clk) + clk_rate = clock * 2; + } + } else { + clk_rate = clock * 2; + } } else { if (clock <= tegra_sdhost_min_freq) clk_rate = tegra_sdhost_min_freq; @@ -1109,10 +1123,23 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) rc = clk_enable(clk); if (rc != 0) goto err_clk_put; + + if (!strcmp(dev_name(mmc_dev(host->mmc)), "sdhci-tegra.3")) { + tegra_host->emc_clk = clk_get(mmc_dev(host->mmc), "emc"); + if (IS_ERR(tegra_host->emc_clk)) { + dev_err(mmc_dev(host->mmc), "clk err\n"); + rc = PTR_ERR(tegra_host->emc_clk); + goto err_clk_put; + } + tegra_host->emc_max_clk = + clk_round_rate(tegra_host->emc_clk, ULONG_MAX); + } + pltfm_host->clk = clk; pltfm_host->priv = tegra_host; tegra_host->clk_enabled = true; tegra_host->max_clk_limit = plat->max_clk_limit; + tegra_host->ddr_clk_limit = plat->ddr_clk_limit; tegra_host->instance = pdev->id; tegra_host->dpd = tegra_io_dpd_get(mmc_dev(host->mmc)); @@ -1152,6 +1179,7 @@ static int __devinit sdhci_tegra_probe(struct platform_device *pdev) return 0; err_add_host: + clk_put(tegra_host->emc_clk); clk_disable(pltfm_host->clk); err_clk_put: clk_put(pltfm_host->clk); From b48d2d9ff3511d993c3cc55c5fe28b9baba18f89 Mon Sep 17 00:00:00 2001 From: Pavan Kunapuli Date: Fri, 4 May 2012 19:02:04 +0530 Subject: [PATCH 072/678] arm: tegra: sdhci: Define ddr50 clock limit Added a new variable in sdhci platform data which will limit the ddr50 mode clock. Bug 967719 Change-Id: I3f55b55651362447845c2e1d5000939e3e028df6 Signed-off-by: Pavan Kunapuli Reviewed-on: http://git-master/r/100569 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Laxman Dewangan --- arch/arm/mach-tegra/include/mach/sdhci.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-tegra/include/mach/sdhci.h b/arch/arm/mach-tegra/include/mach/sdhci.h index b48a9288707..5dc8cd2ddf7 100644 --- a/arch/arm/mach-tegra/include/mach/sdhci.h +++ b/arch/arm/mach-tegra/include/mach/sdhci.h @@ -28,6 +28,7 @@ struct tegra_sdhci_platform_data { int pm_flags; int pm_caps; unsigned int max_clk_limit; + unsigned int ddr_clk_limit; unsigned int tap_delay; struct mmc_platform_data mmc_data; }; From b55c58ffb7564c0cc041e2c50778cc6463a32819 Mon Sep 17 00:00:00 2001 From: Ken Chang Date: Fri, 11 May 2012 15:40:09 +0800 Subject: [PATCH 073/678] i2c: tegra: Fix i2c unknown interrupt issue writes to modules on APB bus may complete out-of-order. need to guarantee that the write is completed by reading it back. read I2C_INT_STATUS back right after writing the current int status in the isr to make sure the clear operation of I2C_INT_STATUS is done before the interrupt is re-enabled. the same also done for DVC_STATUS. bug 980763 Change-Id: I34f18804d530ccadf561fe1736552b6a4dd6e4ce Signed-off-by: Ken Chang Reviewed-on: http://git-master/r/101925 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Bharat Nihalani --- drivers/i2c/busses/i2c-tegra.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 1d9ce6501af..1ae4a398626 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -572,9 +572,12 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) } i2c_writel(i2c_dev, status, I2C_INT_STATUS); + i2c_readl(i2c_dev, I2C_INT_STATUS); - if (i2c_dev->is_dvc) + if (i2c_dev->is_dvc) { dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); + dvc_readl(i2c_dev, DVC_STATUS); + } /* * ensure that the writes above post prior to leaving the interrupt @@ -618,13 +621,16 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) I2C_INT_RX_FIFO_DATA_REQ | I2C_INT_TX_FIFO_OVERFLOW); i2c_writel(i2c_dev, status, I2C_INT_STATUS); + i2c_readl(i2c_dev, I2C_INT_STATUS); /* An error occured, mask dvc interrupt */ if (i2c_dev->is_dvc) dvc_i2c_mask_irq(i2c_dev, DVC_CTRL_REG3_I2C_DONE_INTR_EN); - if (i2c_dev->is_dvc) + if (i2c_dev->is_dvc) { dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); + dvc_readl(i2c_dev, DVC_STATUS); + } /* * ensure that the writes above post prior to leaving the interrupt From aabfb84f9baf492c8d21fded05a4c9417fd5db9b Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Mon, 14 May 2012 13:58:02 +0530 Subject: [PATCH 074/678] spi: tegra: register interrupt as ONESHOT The Tegra spi's engine is design as it generates interrupt when any error occurs and it keep transferring data. It does not stop the engine once error occurred and interrupt generated. This may cause reentry of ISR as on error case, isr get called where it clears interrupt and because it is still in progress, it again interrupts and schedule the thread. The second time scheduling of the isr/thread can cause the issue in queue management and sw state. So Making the interrupt as ONESHOT so that the interrupt will not get schedule until the engine is reset in error case. Change-Id: I96daaf50102aede93164c82b7f6da235d0a7fbfc Signed-off-by: Laxman Dewangan Reviewed-on: http://git-master/r/101547 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Jui Chang Kuo Tested-by: Jui Chang Kuo --- drivers/spi/spi-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra.c b/drivers/spi/spi-tegra.c index 3f913389dd7..cfc610102c1 100644 --- a/drivers/spi/spi-tegra.c +++ b/drivers/spi/spi-tegra.c @@ -1265,7 +1265,7 @@ static int __init spi_tegra_probe(struct platform_device *pdev) sprintf(tspi->port_name, "tegra_spi_%d", pdev->id); ret = request_threaded_irq(tspi->irq, spi_tegra_isr, - spi_tegra_isr_thread, IRQF_DISABLED, + spi_tegra_isr_thread, IRQF_ONESHOT, tspi->port_name, tspi); if (ret < 0) { dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n", From 669e4b12c692a4e68bac00f5eb42c450d3d1adef Mon Sep 17 00:00:00 2001 From: Andrew Dodd Date: Wed, 3 Oct 2012 13:35:43 -0500 Subject: [PATCH 075/678] bq27541: Add current_now property The current is returned by the charger as a twos complement signed integer in milliamps. Applications such as CurrentWidget expect this to be microamps. Signed-off-by: Andrew Dodd Change-Id: I053b3d1f24ec04391e982fdcb4b3486241c3aa34 --- drivers/power/bq27541_battery.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/power/bq27541_battery.c b/drivers/power/bq27541_battery.c index 00d35d44c09..cbb1bed6d32 100755 --- a/drivers/power/bq27541_battery.c +++ b/drivers/power/bq27541_battery.c @@ -142,6 +142,7 @@ static enum power_supply_property bq27541_properties[] = { POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_TECHNOLOGY, POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_TEMP, }; @@ -502,6 +503,18 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, } BAT_NOTICE("voltage_now= %u uV\n", val->intval); } + if (psp == POWER_SUPPLY_PROP_CURRENT_NOW) { + val->intval = rt_value; + /* Returns a signed 16-bit value in mA */ + if (val->intval & 0x8000) { + /* Negative */ + val->intval = ~val->intval & 0x7fff; + val->intval++; + val->intval *= -1; + } + val->intval *= 1000; + BAT_NOTICE("current_now= %d uA\n", val->intval); + } if (psp == POWER_SUPPLY_PROP_STATUS) { ret = bq27541_device->bat_status = rt_value; static char *status_text[] = {"Unknown", "Charging", "Discharging", "Not charging", "Full"}; From 10bf9f7301dc7820c738ca443bd41bb131968c5c Mon Sep 17 00:00:00 2001 From: Andrew Dodd Date: Wed, 8 Aug 2012 22:48:08 -0400 Subject: [PATCH 076/678] bq27541: Correct scaling of voltage_now This is supposed to return the voltage in uV, not mV. Change-Id: I13f084f8c5f41f89a7e6c7cfd86e8806229a159b Signed-off-by: Andrew Dodd Conflicts: drivers/power/bq27541_battery.c --- drivers/power/bq27541_battery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/bq27541_battery.c b/drivers/power/bq27541_battery.c index cbb1bed6d32..9b62f0ca0e9 100755 --- a/drivers/power/bq27541_battery.c +++ b/drivers/power/bq27541_battery.c @@ -501,6 +501,7 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, } else { val->intval = bq27541_device->bat_vol; } + val->intval *= 1000; BAT_NOTICE("voltage_now= %u uV\n", val->intval); } if (psp == POWER_SUPPLY_PROP_CURRENT_NOW) { From 71652c904cb60d3699a8c8cf826a87df744426a7 Mon Sep 17 00:00:00 2001 From: Vandana Salve Date: Tue, 10 Jul 2012 21:03:24 +0530 Subject: [PATCH 077/678] ARM: tegra: clock: Increase boost_up_threshold for AVP clock Increase the boost_up_threshold to 85 for ULP audio bug 1009849 Change-Id: I4b1b746f445f5c2804befa52ae95c69b6b467083 Signed-off-by: Vandana Salve Reviewed-on: http://git-master/r/114620 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Bharat Nihalani Reviewed-by: Aleksandr Frid --- arch/arm/mach-tegra/tegra3_actmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_actmon.c b/arch/arm/mach-tegra/tegra3_actmon.c index 5df6ed1fc47..a76d0a963d9 100644 --- a/arch/arm/mach-tegra/tegra3_actmon.c +++ b/arch/arm/mach-tegra/tegra3_actmon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, NVIDIA Corporation. + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -532,7 +532,7 @@ static struct actmon_dev actmon_dev_avp = { .boost_freq_step = 8000, .boost_up_coef = 200, .boost_down_coef = 50, - .boost_up_threshold = 75, + .boost_up_threshold = 85, .boost_down_threshold = 50, .up_wmark_window = 1, From 32879d54dadfd68654f06ac43ea517dbf93c8fda Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 4 Nov 2012 17:44:08 -0500 Subject: [PATCH 078/678] cpu-tegra.c: remove stress test code to allow building without userspace gov --- arch/arm/mach-tegra/cpu-tegra.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index b43e127d363..934988725bf 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -493,7 +493,7 @@ unsigned int tegra_getspeed(unsigned int cpu) rate = clk_get_rate(cpu_clk) / 1000; return rate; } -extern bool stress_test_enable; +//extern bool stress_test_enable; int tegra_update_cpu_speed(unsigned long rate) { int ret = 0; @@ -531,10 +531,10 @@ int tegra_update_cpu_speed(unsigned long rate) for_each_online_cpu(freqs.cpu) cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if(stress_test_enable) - printk(KERN_DEBUG "cpufreq-tegra: transition: %u --> %u\n", - freqs.old, freqs.new); - +// if(stress_test_enable) +// printk(KERN_DEBUG "cpufreq-tegra: transition: %u --> %u\n", +// freqs.old, freqs.new); +// ret = clk_set_rate(cpu_clk, freqs.new * 1000); if (ret) { pr_err("cpu-tegra: Failed to set cpu frequency to %d kHz\n", From 8f4c136dfd6a076561cb95dad5d9dd494ba6e7b5 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 4 Nov 2012 23:40:33 -0500 Subject: [PATCH 079/678] More makefile optimizations --- Makefile | 56 +++++++++++++++++++++++------------------------ arch/arm/Makefile | 23 +++++++++---------- kernel/Makefile | 7 +++--- 3 files changed, 44 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index 80978c684f4..053b84d4866 100644 --- a/Makefile +++ b/Makefile @@ -347,11 +347,11 @@ CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) -CFLAGS_MODULE = +CFLAGS_MODULE = -O2 -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize AFLAGS_MODULE = LDFLAGS_MODULE = -CFLAGS_KERNEL = -AFLAGS_KERNEL = +CFLAGS_KERNEL = -O2 -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize +AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage @@ -585,37 +585,37 @@ endif # Use make W=1 to enable this warning (see scripts/Makefile.build) KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -ifdef CONFIG_FRAME_POINTER -KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls -else +#ifdef CONFIG_FRAME_POINTER +#KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls +#else # Some targets (ARM with Thumb2, for example), can't be built with frame # pointers. For those, we don't have FUNCTION_TRACER automatically # select FRAME_POINTER. However, FUNCTION_TRACER adds -pg, and this is # incompatible with -fomit-frame-pointer with current GCC, so we don't use # -fomit-frame-pointer with FUNCTION_TRACER. -ifndef CONFIG_FUNCTION_TRACER +#ifndef CONFIG_FUNCTION_TRACER KBUILD_CFLAGS += -fomit-frame-pointer -endif -endif - -ifdef CONFIG_DEBUG_INFO -KBUILD_CFLAGS += -g -KBUILD_AFLAGS += -gdwarf-2 -endif - -ifdef CONFIG_DEBUG_INFO_REDUCED -KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) -endif - -ifdef CONFIG_FUNCTION_TRACER -KBUILD_CFLAGS += -pg -ifdef CONFIG_DYNAMIC_FTRACE - ifdef CONFIG_HAVE_C_RECORDMCOUNT - BUILD_C_RECORDMCOUNT := y - export BUILD_C_RECORDMCOUNT - endif -endif -endif +#endif +#endif + +#ifdef CONFIG_DEBUG_INFO +#KBUILD_CFLAGS += -g +#KBUILD_AFLAGS += -gdwarf-2 +#endif + +#ifdef CONFIG_DEBUG_INFO_REDUCED +#KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) +#endif + +#ifdef CONFIG_FUNCTION_TRACER +#KBUILD_CFLAGS += -pg +#ifdef CONFIG_DYNAMIC_FTRACE +# ifdef CONFIG_HAVE_C_RECORDMCOUNT +# BUILD_C_RECORDMCOUNT := y +# export BUILD_C_RECORDMCOUNT +# endif +#endif +#endif # We trigger additional mismatches with less inlining ifdef CONFIG_DEBUG_SECTION_MISMATCH diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 61bf37448ef..941c13400bd 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -17,7 +17,7 @@ endif OBJCOPYFLAGS :=-O binary -R .comment -S GZFLAGS :=-9 -#KBUILD_CFLAGS +=-pipe +KBUILD_CFLAGS +=-pipe # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: KBUILD_CFLAGS +=$(call cc-option,-marm,) @@ -33,13 +33,13 @@ ifeq ($(CONFIG_MMU),) MMUEXT := -nommu endif -ifeq ($(CONFIG_FRAME_POINTER),y) -KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog -endif +#ifeq ($(CONFIG_FRAME_POINTER),y) +#KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog +#endif -ifeq ($(CONFIG_CC_STACKPROTECTOR),y) -KBUILD_CFLAGS +=-fstack-protector -endif +#ifeq ($(CONFIG_CC_STACKPROTECTOR),y) +#KBUILD_CFLAGS +=-fstack-protector +#endif ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) KBUILD_CPPFLAGS += -mbig-endian @@ -57,7 +57,8 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) +#arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv7-a -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 @@ -97,9 +98,9 @@ else CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) endif -ifeq ($(CONFIG_ARM_UNWIND),y) -CFLAGS_ABI +=-funwind-tables -endif +#ifeq ($(CONFIG_ARM_UNWIND),y) +#CFLAGS_ABI +=-funwind-tables +#endif ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) diff --git a/kernel/Makefile b/kernel/Makefile index eca595e2fd5..9b45c84662f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -109,14 +109,15 @@ obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) +#ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is # needed for x86 only. Why this used to be enabled for all architectures is beyond # me. I suspect most platforms don't need this, but until we know that for sure # I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k # to get a correct value for the wait-channel (WCHAN in ps). --davidm -CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer -endif +#CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer +CFLAGS_sched.o := -O2 -fomit-frame-pointer -mtune=cortex-a9 -march=armv7-a -ftree-vectorize +#endif $(obj)/configs.o: $(obj)/config_data.h From 799801eebc31e6c0f7a0cccf1ad06a2120fcf913 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 4 Nov 2012 23:41:03 -0500 Subject: [PATCH 080/678] boot: compressed: makefile: fix for linaro toolchain --- arch/arm/boot/compressed/Makefile | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 0c74a6fab95..9e56c726bbd 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -5,6 +5,7 @@ # OBJS = +plus_sec := $(call as-instr,.arch_extension sec,+sec) # Ensure that MMCIF loader code appears early in the image # to minimise that number of bocks that have to be read in @@ -21,9 +22,16 @@ OBJS += sdhi-shmobile.o OBJS += sdhi-sh7372.o endif -AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET) -HEAD = head.o -OBJS += misc.o decompress.o +AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +AFLAGS_head.o += -Wa,-march=armv7-a$(plus_sec) +HEAD = head.o + +AFLAGS_misc.o +=-Wa,-march=armv7-a$(plus_sec) +MISC = misc.o + +AFLAGS_decompress.o += -Wa,-march=armv7-a$(plus_sec) +DECOMPRESS = decompress.o + FONTC = $(srctree)/drivers/video/console/font_acorn_8x8.c # @@ -89,9 +97,9 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo suffix_$(CONFIG_KERNEL_LZMA) = lzma -targets := vmlinux vmlinux.lds \ +targets := vmlinux vmlinux.lds \ piggy.$(suffix_y) piggy.$(suffix_y).o \ - font.o font.c head.o misc.o $(OBJS) + font.o font.c head.o misc.o decompress.o $(OBJS) # Make sure files are removed during clean extra-y += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S @@ -121,6 +129,7 @@ LDFLAGS_vmlinux += -X LDFLAGS_vmlinux += -T # For __aeabi_uidivmod +AFLAGS_lib1funcs.o +=-Wa,-march=armv7-a$(plus_sec) lib1funcs = $(obj)/lib1funcs.o $(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE @@ -139,7 +148,7 @@ bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \ ( echo "following symbols must have non local/private scope:" >&2; \ echo "$$bad_syms" >&2; rm -f $@; false ) -$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/$(MISC) $(obj)/$(DECOMPRESS) $(obj)/piggy.$(suffix_y).o \ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE $(call if_changed,ld) @$(check_for_bad_syms) @@ -147,6 +156,7 @@ $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ $(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE $(call if_changed,$(suffix_y)) +AFLAGS_piggy.$(suffix_y).o += -Wa,-march=armv7-a$(plus_sec) $(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) FORCE CFLAGS_font.o := -Dstatic= From 2d153777b4073a02d9d920ada2a883fd2e1562ab Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 4 Nov 2012 23:41:35 -0500 Subject: [PATCH 081/678] HACK temporary hack to allow building until I can take a better look --- arch/arm/mach-tegra/usb_phy.c | 24 ++++++++++++------------ drivers/usb/host/ehci-tegra.c | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index 6e84e3d8279..0757020c5f0 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -2327,12 +2327,12 @@ static int uhsic_phy_power_on(struct tegra_usb_phy *phy, bool is_dpd) uhsic_powerup_pmc_wake_detect(phy); #endif - if (uhsic_config->enable_gpio != -1) { - baseband_xmm_enable_hsic_power(1); - gpio_set_value_cansleep(uhsic_config->enable_gpio, 1); - /* keep hsic reset asserted for 1 ms */ - udelay(1000); - } +// if (uhsic_config->enable_gpio != -1) { +// baseband_xmm_enable_hsic_power(1); +// gpio_set_value_cansleep(uhsic_config->enable_gpio, 1); +// /* keep hsic reset asserted for 1 ms */ +// udelay(1000); +// } val = readl(base + UHSIC_PADS_CFG1); val &= ~(UHSIC_PD_BG | UHSIC_PD_TX | UHSIC_PD_TRK | UHSIC_PD_RX | @@ -2458,12 +2458,12 @@ static int uhsic_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) val &= ~UHSIC_PHY_ENABLE; writel(val, base + USB_SUSP_CTRL); - if (uhsic_config->enable_gpio != -1) { - gpio_set_value_cansleep(uhsic_config->enable_gpio, 0); - /* keep hsic reset de-asserted for 1 ms */ - udelay(1000); - baseband_xmm_enable_hsic_power(0); - } +// if (uhsic_config->enable_gpio != -1) { +// gpio_set_value_cansleep(uhsic_config->enable_gpio, 0); +// /* keep hsic reset de-asserted for 1 ms */ +// udelay(1000); +// baseband_xmm_enable_hsic_power(0); +// } if (uhsic_config->post_phy_off && uhsic_config->post_phy_off()) return -EAGAIN; diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 76f40688f82..ae1101a0c3a 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -1282,8 +1282,8 @@ static int tegra_ehci_resume(struct platform_device *pdev) int ret; u32 project_info = grouper_get_project_id(); - if (project_info == GROUPER_PROJECT_NAKASI_3G) - baseband_xmm_L3_resume_check(); +// if (project_info == GROUPER_PROJECT_NAKASI_3G) +// baseband_xmm_L3_resume_check(); mutex_lock(&tegra->tegra_ehci_hcd_mutex); if ((tegra->bus_suspended) && (tegra->power_down_on_bus_suspend)) { From f42e229d69e76c18e2d3366eb3530dc7234f247f Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 15 Oct 2012 04:11:05 -0400 Subject: [PATCH 082/678] cpufreq: interactive and ondemand changes --- drivers/cpufreq/cpufreq_interactive.c | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 24 ++++++++---------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index f6de88549cc..24633ca8778 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -752,7 +752,7 @@ static int cpufreq_interactive_input_connect(struct input_handler *handler, struct input_handle *handle; int error; - pr_info("%s: connect to %s\n", __func__, dev->name); + pr_debug("%s: connect to %s\n", __func__, dev->name); handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); if (!handle) return -ENOMEM; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 111b1e07dcb..244640ff398 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,13 +28,14 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (15) -#define DEF_FREQUENCY_UP_THRESHOLD (75) -#define DEF_SAMPLING_DOWN_FACTOR (2) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define MICRO_FREQUENCY_UP_THRESHOLD (90) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MICRO_FREQUENCY_DEF_SAMPLE_RATE (15000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -671,11 +672,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (latency == 0) latency = 1; /* Bring kernel and HW constraints together */ - min_sampling_rate = max(min_sampling_rate, - MIN_LATENCY_MULTIPLIER * latency); - dbs_tuners_ins.sampling_rate = - max(min_sampling_rate, - latency * LATENCY_MULTIPLIER); + min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + dbs_tuners_ins.sampling_rate = MICRO_FREQUENCY_DEF_SAMPLE_RATE; dbs_tuners_ins.io_is_busy = should_io_be_busy(); } mutex_unlock(&dbs_mutex); @@ -718,7 +716,6 @@ static int __init cpufreq_gov_dbs_init(void) idle_time = get_cpu_idle_time_us(cpu, &wall); put_cpu(); - if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; dbs_tuners_ins.down_differential = @@ -729,11 +726,6 @@ static int __init cpufreq_gov_dbs_init(void) * timer might skip some samples if idle/sleeping as needed. */ min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; - } else { - /* For correct statistics, we need 10 ticks for each measure */ - min_sampling_rate = - MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); - } return cpufreq_register_governor(&cpufreq_gov_ondemand); } From 16d6b756bdd56634ae163376a397bb195089b8f0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 11:48:09 -0500 Subject: [PATCH 083/678] Revert "bq27541: Correct scaling of voltage_now" This reverts commit b1ed603a7dc08ab9515c10a6cd7bea9ddfd1b9e7. --- drivers/power/bq27541_battery.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/power/bq27541_battery.c b/drivers/power/bq27541_battery.c index 9b62f0ca0e9..cbb1bed6d32 100755 --- a/drivers/power/bq27541_battery.c +++ b/drivers/power/bq27541_battery.c @@ -501,7 +501,6 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, } else { val->intval = bq27541_device->bat_vol; } - val->intval *= 1000; BAT_NOTICE("voltage_now= %u uV\n", val->intval); } if (psp == POWER_SUPPLY_PROP_CURRENT_NOW) { From d506db6dd2eb020fb6fe4fc9ff192ca88d453f38 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 11:48:36 -0500 Subject: [PATCH 084/678] Revert "HACK temporary hack to allow building until I can take a better look" This reverts commit d375d9c70a64c8b65279a402a6bb2da1e643c117. --- arch/arm/mach-tegra/usb_phy.c | 24 ++++++++++++------------ drivers/usb/host/ehci-tegra.c | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index 0757020c5f0..6e84e3d8279 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -2327,12 +2327,12 @@ static int uhsic_phy_power_on(struct tegra_usb_phy *phy, bool is_dpd) uhsic_powerup_pmc_wake_detect(phy); #endif -// if (uhsic_config->enable_gpio != -1) { -// baseband_xmm_enable_hsic_power(1); -// gpio_set_value_cansleep(uhsic_config->enable_gpio, 1); -// /* keep hsic reset asserted for 1 ms */ -// udelay(1000); -// } + if (uhsic_config->enable_gpio != -1) { + baseband_xmm_enable_hsic_power(1); + gpio_set_value_cansleep(uhsic_config->enable_gpio, 1); + /* keep hsic reset asserted for 1 ms */ + udelay(1000); + } val = readl(base + UHSIC_PADS_CFG1); val &= ~(UHSIC_PD_BG | UHSIC_PD_TX | UHSIC_PD_TRK | UHSIC_PD_RX | @@ -2458,12 +2458,12 @@ static int uhsic_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) val &= ~UHSIC_PHY_ENABLE; writel(val, base + USB_SUSP_CTRL); -// if (uhsic_config->enable_gpio != -1) { -// gpio_set_value_cansleep(uhsic_config->enable_gpio, 0); -// /* keep hsic reset de-asserted for 1 ms */ -// udelay(1000); -// baseband_xmm_enable_hsic_power(0); -// } + if (uhsic_config->enable_gpio != -1) { + gpio_set_value_cansleep(uhsic_config->enable_gpio, 0); + /* keep hsic reset de-asserted for 1 ms */ + udelay(1000); + baseband_xmm_enable_hsic_power(0); + } if (uhsic_config->post_phy_off && uhsic_config->post_phy_off()) return -EAGAIN; diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index ae1101a0c3a..76f40688f82 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -1282,8 +1282,8 @@ static int tegra_ehci_resume(struct platform_device *pdev) int ret; u32 project_info = grouper_get_project_id(); -// if (project_info == GROUPER_PROJECT_NAKASI_3G) -// baseband_xmm_L3_resume_check(); + if (project_info == GROUPER_PROJECT_NAKASI_3G) + baseband_xmm_L3_resume_check(); mutex_lock(&tegra->tegra_ehci_hcd_mutex); if ((tegra->bus_suspended) && (tegra->power_down_on_bus_suspend)) { From 0efe49936ed16384dc3d8186a382a6507fb9a516 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 11:53:17 -0500 Subject: [PATCH 085/678] defconfig: add metallice_grouper_defconfig a9/mr0 --- arch/arm/configs/metallice_grouper_defconfig | 3303 ++++++++++++++++++ 1 file changed, 3303 insertions(+) create mode 100644 arch/arm/configs/metallice_grouper_defconfig diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig new file mode 100644 index 00000000000..4ac53e3a5eb --- /dev/null +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -0,0 +1,3303 @@ +# +# Automatically generated file; DO NOT EDIT. +# Linux/arm 3.1.10 Kernel Configuration +# +CONFIG_ARM=y +CONFIG_HAVE_PWM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_HAVE_SCHED_CLOCK=y +CONFIG_GENERIC_GPIO=y +# CONFIG_ARCH_USES_GETTIMEOFFSET is not set +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_KTIME_SCALAR=y +CONFIG_HAVE_PROC_CPU=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_LOCKBREAK=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_NEED_DMA_MAP_STATE=y +CONFIG_FIQ=y +CONFIG_ARCH_PROVIDES_UDELAY=y +CONFIG_VECTORS_BASE=0xffff0000 +# CONFIG_ARM_PATCH_PHYS_VIRT is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_HAVE_IRQ_WORK=y +CONFIG_IRQ_WORK=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_CROSS_COMPILE="" +CONFIG_LOCALVERSION="-MKernel-446-620" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_HAVE_KERNEL_LZO=y +CONFIG_KERNEL_GZIP=y +# CONFIG_KERNEL_LZMA is not set +# CONFIG_KERNEL_LZO is not set +CONFIG_DEFAULT_HOSTNAME="(none)" +CONFIG_SWAP=y +# CONFIG_SYSVIPC is not set +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_FHANDLE is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_HAVE_GENERIC_HARDIRQS=y + +# +# IRQ subsystem +# +CONFIG_GENERIC_HARDIRQS=y +CONFIG_HAVE_SPARSE_IRQ=y +CONFIG_GENERIC_IRQ_SHOW=y +# CONFIG_SPARSE_IRQ is not set + +# +# RCU Subsystem +# +CONFIG_TREE_PREEMPT_RCU=y +CONFIG_PREEMPT_RCU=y +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_RCU_BOOST is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_FREEZER=y +# CONFIG_CGROUP_DEVICE is not set +# CONFIG_CPUSETS is not set +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_CGROUP_PERF is not set +CONFIG_CGROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +# CONFIG_BLK_CGROUP is not set +# CONFIG_NAMESPACES is not set +# CONFIG_SCHED_AUTOGROUP is not set +# CONFIG_SYSFS_DEPRECATED is not set +# CONFIG_RELAY is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_PANIC_TIMEOUT=10 +CONFIG_EXPERT=y +CONFIG_UID16=y +# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +# CONFIG_ELF_CORE is not set +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_ASHMEM=y +CONFIG_AIO=y +CONFIG_EMBEDDED=y +CONFIG_HAVE_PERF_EVENTS=y +CONFIG_PERF_USE_VMALLOC=y + +# +# Kernel Performance Events And Counters +# +CONFIG_PERF_EVENTS=y +# CONFIG_PERF_COUNTERS is not set +# CONFIG_DEBUG_PERF_USE_VMALLOC is not set +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y +# CONFIG_SLUB_DEBUG is not set +CONFIG_COMPAT_BRK=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_USE_GENERIC_SMP_HELPERS=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +CONFIG_HAVE_HW_BREAKPOINT=y + +# +# GCOV-based kernel profiling +# +# CONFIG_GCOV_KERNEL is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBDAF=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_BSGLIB is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_IOSCHED_SIO=y +CONFIG_IOSCHED_VR=y +CONFIG_DEFAULT_DEADLINE=y +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +# CONFIG_DEFAULT_SIO is not set +# CONFIG_DEFAULT_VR is not set +CONFIG_DEFAULT_IOSCHED="deadline" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +# CONFIG_INLINE_SPIN_UNLOCK is not set +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQ is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +# CONFIG_INLINE_READ_UNLOCK is not set +# CONFIG_INLINE_READ_UNLOCK_BH is not set +# CONFIG_INLINE_READ_UNLOCK_IRQ is not set +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +# CONFIG_INLINE_WRITE_UNLOCK is not set +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQ is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +CONFIG_MUTEX_SPIN_ON_OWNER=y +CONFIG_FREEZER=y + +# +# System Type +# +CONFIG_MMU=y +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_VEXPRESS is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_BCMRING is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_CNS3XXX is not set +# CONFIG_ARCH_GEMINI is not set +# CONFIG_ARCH_PRIMA2 is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_MXS is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_DOVE is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LPC32XX is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_NUC93X is not set +CONFIG_ARCH_TEGRA=y +# CONFIG_ARCH_PNX4008 is not set +# CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_SHMOBILE is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_S5P64X0 is not set +# CONFIG_ARCH_S5PC100 is not set +# CONFIG_ARCH_S5PV210 is not set +# CONFIG_ARCH_EXYNOS4 is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_TCC_926 is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_U8500 is not set +# CONFIG_ARCH_NOMADIK is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set +# CONFIG_PLAT_SPEAR is not set +# CONFIG_ARCH_VT8500 is not set +# CONFIG_ARCH_ZYNQ is not set +CONFIG_GPIO_PCA953X=y +# CONFIG_KEYBOARD_GPIO_POLLED is not set + +# +# System MMU +# + +# +# NVIDIA Tegra options +# +CONFIG_ARCH_TEGRA_3x_SOC=y +CONFIG_ARCH_TEGRA_HAS_DUAL_3D=y +CONFIG_ARCH_TEGRA_HAS_DUAL_CPU_CLUSTERS=y +CONFIG_ARCH_TEGRA_HAS_PCIE=y +CONFIG_ARCH_TEGRA_HAS_SATA=y +CONFIG_TEGRA_PCI=y + +# +# Tegra board type +# +# CONFIG_MACH_TEGRA_DT is not set +# CONFIG_MACH_ARUBA is not set +# CONFIG_MACH_CARDHU is not set +# CONFIG_MACH_P1852 is not set +# CONFIG_MACH_TEGRA_ENTERPRISE is not set +# CONFIG_MACH_KAI is not set +CONFIG_MACH_GROUPER=y +CONFIG_TEGRA_SILICON_PLATFORM=y +# CONFIG_TEGRA_SIMULATION_PLATFORM is not set +# CONFIG_TEGRA_FPGA_PLATFORM is not set +CONFIG_TEGRA_DEBUG_UART_NONE=y +CONFIG_TEGRA_SYSTEM_DMA=y +CONFIG_TEGRA_PWM=y +CONFIG_TEGRA_FIQ_DEBUGGER=y +CONFIG_TEGRA_EMC_SCALING_ENABLE=y +CONFIG_VOLTAGE_CONTROL=y +CONFIG_GPU_OVERCLOCK=y +CONFIG_GPU_OC_446=y +# CONFIG_GPU_OC_484 is not set +# CONFIG_GPU_OC_520 is not set +CONFIG_TEGRA_CPU_DVFS=y +CONFIG_TEGRA_CORE_DVFS=y +CONFIG_TEGRA_IOVMM_SMMU=y +# CONFIG_TEGRA_SMMU_BASE_AT_E0000000 is not set +# CONFIG_TEGRA_IOVMM_SMMU_SYSFS is not set +CONFIG_TEGRA_IOVMM=y +CONFIG_TEGRA_AVP_KERNEL_ON_SMMU=y +CONFIG_TEGRA_THERMAL_THROTTLE=y +CONFIG_WIFI_CONTROL_FUNC=y +CONFIG_TEGRA_CLOCK_DEBUG_WRITE=y +CONFIG_TEGRA_CLUSTER_CONTROL=y +CONFIG_TEGRA_AUTO_HOTPLUG=y +CONFIG_TEGRA_MC_EARLY_ACK=y +CONFIG_TEGRA_MC_PROFILE=y +CONFIG_TEGRA_EDP_LIMITS=y +CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 +# CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set +CONFIG_USB_HOTPLUG=y +CONFIG_TEGRA_DYNAMIC_PWRDET=y +CONFIG_TEGRA_EDP_EXACT_FREQ=y +# CONFIG_TEGRA_USB_MODEM_POWER is not set +CONFIG_TEGRA_BB_XMM_POWER=y +# CONFIG_TEGRA_BB_XMM_POWER2 is not set +# CONFIG_TEGRA_THERMAL_SYSFS is not set +CONFIG_TEGRA_PLLM_RESTRICTED=y +# CONFIG_TEGRA_WDT_RECOVERY is not set +CONFIG_TEGRA_LP2_ARM_TWD=y +CONFIG_TEGRA_SLOW_CSITE=y +# CONFIG_TEGRA_PREINIT_CLOCKS is not set + +# +# Processor Type +# +CONFIG_CPU_V7=y +CONFIG_CPU_32v6K=y +CONFIG_CPU_32v7=y +CONFIG_CPU_ABRT_EV7=y +CONFIG_CPU_PABRT_V7=y +CONFIG_CPU_CACHE_V7=y +CONFIG_CPU_CACHE_VIPT=y +CONFIG_CPU_COPY_V6=y +CONFIG_CPU_TLB_V7=y +CONFIG_CPU_HAS_ASID=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_ARM_THUMBEE is not set +CONFIG_SWP_EMULATE=y +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +# CONFIG_CPU_BPREDICT_DISABLE is not set +CONFIG_OUTER_CACHE=y +CONFIG_OUTER_CACHE_SYNC=y +CONFIG_CACHE_L2X0=y +CONFIG_CACHE_PL310=y +CONFIG_ARM_L1_CACHE_SHIFT=5 +CONFIG_ARM_DMA_MEM_BUFFERABLE=y +CONFIG_ARM_SAVE_DEBUG_CONTEXT=y +CONFIG_CPA=y +CONFIG_CPU_HAS_PMU=y +# CONFIG_ARM_ERRATA_430973 is not set +# CONFIG_ARM_ERRATA_458693 is not set +# CONFIG_ARM_ERRATA_460075 is not set +CONFIG_ARM_ERRATA_742230=y +# CONFIG_ARM_ERRATA_742231 is not set +# CONFIG_PL310_ERRATA_588369 is not set +# CONFIG_ARM_ERRATA_720789 is not set +# CONFIG_PL310_ERRATA_727915 is not set +CONFIG_ARM_ERRATA_743622=y +CONFIG_ARM_ERRATA_751472=y +# CONFIG_ARM_ERRATA_753970 is not set +CONFIG_ARM_ERRATA_754322=y +# CONFIG_ARM_ERRATA_754327 is not set +CONFIG_ARM_ERRATA_764369=y +# CONFIG_ARM_ERRATA_720791 is not set +CONFIG_ARM_ERRATA_752520=y +# CONFIG_PL310_ERRATA_769419 is not set +CONFIG_ARM_GIC=y +CONFIG_FIQ_GLUE=y +CONFIG_FIQ_DEBUGGER=y +# CONFIG_FIQ_DEBUGGER_NO_SLEEP is not set +# CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON is not set +CONFIG_FIQ_DEBUGGER_CONSOLE=y +# CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE is not set +CONFIG_GIC_SET_MULTIPLE_CPUS=y + +# +# Bus support +# +CONFIG_PCI=y +CONFIG_PCI_SYSCALL=y +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y +CONFIG_SMP_ON_UP=y +CONFIG_HAVE_ARM_SCU=y +CONFIG_HAVE_ARM_TWD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_TASK_SIZE_3G_LESS_16M=y +# CONFIG_TASK_SIZE_3G_LESS_24M is not set +CONFIG_TASK_SIZE=0xBF000000 +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_LOCAL_TIMERS=y +CONFIG_ARCH_NR_GPIO=512 +# CONFIG_PREEMPT_NONE is not set +# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_PREEMPT=y +CONFIG_PREEMPT_COUNT=y +CONFIG_HZ=100 +# CONFIG_THUMB2_KERNEL is not set +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +CONFIG_HAVE_ARCH_PFN_VALID=y +CONFIG_HIGHMEM=y +# CONFIG_HIGHPTE is not set +CONFIG_HW_PERF_EVENTS=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_HAVE_MEMBLOCK=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_COMPACTION is not set +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +# CONFIG_CLEANCACHE is not set +CONFIG_FORCE_MAX_ZONEORDER=11 +CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set +# CONFIG_SECCOMP is not set +# CONFIG_CC_STACKPROTECTOR is not set +# CONFIG_DEPRECATED_PARAM_STRUCT is not set +CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART=y + +# +# Boot options +# +# CONFIG_USE_OF is not set +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="tegra_wdt.heartbeat=30" +# CONFIG_CMDLINE_FROM_BOOTLOADER is not set +CONFIG_CMDLINE_EXTEND=y +# CONFIG_CMDLINE_FORCE is not set +# CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +# CONFIG_AUTO_ZRELADDR is not set + +# +# CPU Power Management +# + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_INTERACTIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +# CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set +# CONFIG_CPU_FREQ_GOV_PEGASUSQ is not set + +# +# ARM CPU frequency scaling drivers +# +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +CONFIG_VFP=y +CONFIG_VFPv3=y +CONFIG_NEON=y + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HAS_WAKELOCK=y +CONFIG_HAS_EARLYSUSPEND=y +CONFIG_WAKELOCK=y +CONFIG_WAKELOCK_STAT=y +CONFIG_USER_WAKELOCK=y +CONFIG_EARLYSUSPEND=y +# CONFIG_NO_USER_SPACE_SCREEN_ACCESS_CONTROL is not set +CONFIG_FB_EARLYSUSPEND=y +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_RUNTIME=y +CONFIG_PM=y +# CONFIG_PM_DEBUG is not set +# CONFIG_APM_EMULATION is not set +CONFIG_PM_CLK=y +CONFIG_SUSPEND_TIME=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +CONFIG_XFRM_IPCOMP=y +CONFIG_NET_KEY=y +# CONFIG_NET_KEY_MIGRATE is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +# CONFIG_IP_FIB_TRIE_STATS is not set +CONFIG_IP_MULTIPLE_TABLES=y +# CONFIG_IP_ROUTE_MULTIPATH is not set +# CONFIG_IP_ROUTE_VERBOSE is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE_DEMUX is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +CONFIG_INET_ESP=y +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +# CONFIG_IPV6_ROUTE_INFO is not set +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_INET6_XFRM_TUNNEL=y +CONFIG_INET6_TUNNEL=y +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_INET6_XFRM_MODE_BEET=y +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +# CONFIG_IPV6_SIT_6RD is not set +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_MULTIPLE_TABLES=y +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_ANDROID_PARANOID_NETWORK=y +CONFIG_NET_ACTIVITY_STATS=y +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETWORK_PHY_TIMESTAMPING is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +# CONFIG_NF_CONNTRACK_TIMESTAMP is not set +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_GRE=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_BROADCAST=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +# CONFIG_NF_CONNTRACK_SNMP is not set +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +# CONFIG_NF_CONNTRACK_SIP is not set +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XTABLES=y + +# +# Xtables combined modules +# +CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_CONNMARK=y + +# +# Xtables targets +# +# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +# CONFIG_NETFILTER_XT_TARGET_CT is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +# CONFIG_NETFILTER_XT_TARGET_HL is not set +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set +# CONFIG_NETFILTER_XT_TARGET_TEE is not set +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set +# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set + +# +# Xtables matches +# +# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set +# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +# CONFIG_NETFILTER_XT_MATCH_CPU is not set +# CONFIG_NETFILTER_XT_MATCH_DCCP is not set +# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_HL=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set +# CONFIG_NETFILTER_XT_MATCH_OSF is not set +# CONFIG_NETFILTER_XT_MATCH_OWNER is not set +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y +# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set +# CONFIG_NETFILTER_XT_MATCH_REALM is not set +# CONFIG_NETFILTER_XT_MATCH_RECENT is not set +# CONFIG_NETFILTER_XT_MATCH_SCTP is not set +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +# CONFIG_IP_SET is not set +# CONFIG_IP_VS is not set + +# +# IP: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV4=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_REJECT_SKERR=y +CONFIG_IP_NF_TARGET_LOG=y +# CONFIG_IP_NF_TARGET_ULOG is not set +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_NF_NAT_PROTO_DCCP=y +CONFIG_NF_NAT_PROTO_GRE=y +CONFIG_NF_NAT_PROTO_UDPLITE=y +CONFIG_NF_NAT_PROTO_SCTP=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +CONFIG_NF_NAT_TFTP=y +CONFIG_NF_NAT_AMANDA=y +CONFIG_NF_NAT_PPTP=y +CONFIG_NF_NAT_H323=y +# CONFIG_NF_NAT_SIP is not set +CONFIG_IP_NF_MANGLE=y +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +# CONFIG_IP_NF_TARGET_ECN is not set +# CONFIG_IP_NF_TARGET_TTL is not set +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_CONNTRACK_IPV6=y +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT_SKERR=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_L2TP is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +CONFIG_NET_SCH_HTB=y +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFB is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set +# CONFIG_NET_SCH_MQPRIO is not set +# CONFIG_NET_SCH_CHOKE is not set +# CONFIG_NET_SCH_QFQ is not set +CONFIG_NET_SCH_INGRESS=y + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +CONFIG_NET_CLS_U32=y +# CONFIG_CLS_U32_PERF is not set +# CONFIG_CLS_U32_MARK is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +CONFIG_NET_EMATCH_U32=y +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_GACT=y +# CONFIG_GACT_PROB is not set +CONFIG_NET_ACT_MIRRED=y +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set +# CONFIG_NET_ACT_CSUM is not set +# CONFIG_NET_CLS_IND is not set +CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set +# CONFIG_BATMAN_ADV is not set +CONFIG_RPS=y +CONFIG_RFS_ACCEL=y +CONFIG_XPS=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +CONFIG_BT=y +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y +CONFIG_BT_RFCOMM=y +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=y +# CONFIG_BT_BNEP_MC_FILTER is not set +# CONFIG_BT_BNEP_PROTO_FILTER is not set +CONFIG_BT_HIDP=y + +# +# Bluetooth device drivers +# +# CONFIG_BT_HCIBTUSB is not set +# CONFIG_BT_HCIBTSDIO is not set +CONFIG_BT_HCIUART=y +CONFIG_BT_HCIUART_H4=y +# CONFIG_BT_HCIUART_BCSP is not set +# CONFIG_BT_HCIUART_ATH3K is not set +CONFIG_BT_HCIUART_LL=y +# CONFIG_BT_HCIBCM203X is not set +CONFIG_BT_BLUESLEEP=y +# CONFIG_BT_TIBLUESLEEP is not set +# CONFIG_BT_HCIBPA10X is not set +# CONFIG_BT_HCIBFUSB is not set +# CONFIG_BT_HCIVHCI is not set +# CONFIG_BT_MRVL is not set +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y +CONFIG_WIRELESS=y +CONFIG_WEXT_CORE=y +CONFIG_WEXT_PROC=y +CONFIG_CFG80211=y +CONFIG_NL80211_TESTMODE=y +# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +# CONFIG_CFG80211_REG_DEBUG is not set +CONFIG_CFG80211_DEFAULT_PS=y +# CONFIG_CFG80211_DEBUGFS is not set +# CONFIG_CFG80211_INTERNAL_REGDB is not set +CONFIG_CFG80211_WEXT=y +CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set +# CONFIG_CFG80211_ALLOW_RECONNECT is not set +# CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +CONFIG_RFKILL_PM=y +# CONFIG_RFKILL_INPUT is not set +# CONFIG_RFKILL_REGULATOR is not set +# CONFIG_RFKILL_GPIO is not set +# CONFIG_NET_9P is not set +CONFIG_CAIF=y +# CONFIG_CAIF_DEBUG is not set +CONFIG_CAIF_NETDEV=y +# CONFIG_CEPH_LIB is not set +CONFIG_NFC=y + +# +# Near Field Communication (NFC) devices +# +CONFIG_PN544_NFC=y +# CONFIG_NFC_PN533 is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="" +# CONFIG_DEVTMPFS is not set +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_REGMAP=y +CONFIG_REGMAP_I2C=y +# CONFIG_DMA_SHARED_BUFFER is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 +# CONFIG_BLK_DEV_CRYPTOLOOP is not set + +# +# DRBD disabled because PROC_FS, INET or CONNECTOR not selected +# +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_BLK_DEV_RBD is not set +# CONFIG_SENSORS_LIS3LV02D is not set +CONFIG_MISC_DEVICES=y +CONFIG_AD525X_DPOT=y +CONFIG_AD525X_DPOT_I2C=y +# CONFIG_AD525X_DPOT_SPI is not set +# CONFIG_PHANTOM is not set +# CONFIG_INTEL_MID_PTI is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +CONFIG_APDS9802ALS=y +# CONFIG_ISL29003 is not set +# CONFIG_ISL29020 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_SENSORS_BH1780 is not set +# CONFIG_SENSORS_BH1770 is not set +# CONFIG_SENSORS_APDS990X is not set +# CONFIG_HMC6352 is not set +# CONFIG_SENSORS_AK8975 is not set +CONFIG_SENSORS_NCT1008=y +# CONFIG_DS1682 is not set +# CONFIG_TI_DAC7512 is not set +CONFIG_UID_STAT=y +# CONFIG_BMP085 is not set +# CONFIG_PCH_PHUB is not set +# CONFIG_USB_SWITCH_FSA9480 is not set +# CONFIG_WL127X_RFKILL is not set +# CONFIG_APANIC is not set +# CONFIG_BCM4329_RFKILL is not set +CONFIG_BCM4330_RFKILL=y +CONFIG_TEGRA_CRYPTO_DEV=y +CONFIG_MAX1749_VIBRATOR=y +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +CONFIG_EEPROM_AT24=y +# CONFIG_EEPROM_AT25 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_MAX6875 is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_EEPROM_93XX46 is not set +# CONFIG_CB710_CORE is not set +# CONFIG_IWMC3200TOP is not set + +# +# Texas Instruments shared transport line discipline +# +# CONFIG_TI_ST is not set +# CONFIG_ST_GPS is not set +# CONFIG_SENSORS_LIS3_SPI is not set +# CONFIG_SENSORS_LIS3_I2C is not set +CONFIG_TEGRA_BB_SUPPORT=y +CONFIG_TEGRA_BB_POWER=y +CONFIG_TEGRA_BB_M7400=y +CONFIG_FSYNC_CONTROL=y +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_SCSI_MOD=y +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set +CONFIG_SCSI_MULTI_LUN=y +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +# CONFIG_SCSI_WAIT_SCAN is not set + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_ISCSI_BOOT_SYSFS is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set +# CONFIG_SCSI_CXGB4_ISCSI is not set +# CONFIG_SCSI_BNX2_ISCSI is not set +# CONFIG_SCSI_BNX2X_FCOE is not set +# CONFIG_BE2ISCSI is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_HPSA is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_3W_SAS is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_MPT2SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_PMCRAID is not set +# CONFIG_SCSI_PM8001 is not set +# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_BFA_FC is not set +# CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set +# CONFIG_ATA is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set +CONFIG_DM_CRYPT=y +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_RAID is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set +CONFIG_DM_UEVENT=y +# CONFIG_DM_FLAKEY is not set +# CONFIG_TARGET_CORE is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_FIREWIRE_NOSY is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +# CONFIG_IFB is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y +# CONFIG_VETH is not set +# CONFIG_ARCNET is not set +CONFIG_MII=y +# CONFIG_PHYLIB is not set +# CONFIG_NET_ETHERNET is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_E1000E is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set +# CONFIG_IGBVF is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +# CONFIG_CNIC is not set +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set +# CONFIG_JME is not set +# CONFIG_STMMAC_ETH is not set +# CONFIG_PCH_GBE is not set +# CONFIG_FTGMAC100 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set +CONFIG_WLAN=y +# CONFIG_ATMEL is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_ATH_COMMON is not set +# CONFIG_BCM4329 is not set +CONFIG_BCMDHD=y +CONFIG_BCMDHD_FW_PATH="/system/vendor/firmware/fw_bcmdhd.bin" +CONFIG_BCMDHD_NVRAM_PATH="/system/etc/nvram.txt" +# CONFIG_DHD_USE_STATIC_BUF is not set +# CONFIG_DHD_USE_SCHED_SCAN is not set +CONFIG_DHD_ENABLE_P2P=y +CONFIG_BCMDHD_WIFI_PM=y +# CONFIG_HOSTAP is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_IWM is not set +# CONFIG_LIBERTAS is not set +# CONFIG_HERMES is not set +# CONFIG_MWIFIEX is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +CONFIG_USB_USBNET=y +CONFIG_USB_NET_AX8817X=y +CONFIG_USB_NET_CDCETHER=y +# CONFIG_USB_NET_CDC_EEM is not set +CONFIG_USB_NET_CDC_NCM=y +# CONFIG_USB_NET_DM9601 is not set +# CONFIG_USB_NET_SMSC75XX is not set +CONFIG_USB_NET_SMSC95XX=y +# CONFIG_USB_NET_GL620A is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_PLUSB is not set +# CONFIG_USB_NET_MCS7830 is not set +# CONFIG_USB_NET_RNDIS_HOST is not set +CONFIG_USB_NET_CDC_SUBSET=y +# CONFIG_USB_ALI_M5632 is not set +# CONFIG_USB_AN2720 is not set +# CONFIG_USB_BELKIN is not set +# CONFIG_USB_ARMLINUX is not set +# CONFIG_USB_EPSON2888 is not set +# CONFIG_USB_KC2190 is not set +# CONFIG_USB_NET_ZAURUS is not set +# CONFIG_USB_NET_CX82310_ETH is not set +# CONFIG_USB_NET_KALMIA is not set +# CONFIG_USB_HSO is not set +# CONFIG_USB_NET_INT51X1 is not set +# CONFIG_USB_IPHETH is not set +# CONFIG_USB_SIERRA_NET is not set +# CONFIG_USB_VL600 is not set +# CONFIG_USB_NET_RAW_IP is not set +# CONFIG_WAN is not set + +# +# CAIF transport drivers +# +# CONFIG_CAIF_TTY is not set +# CONFIG_CAIF_SPI_SLAVE is not set +# CONFIG_CAIF_HSI is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=y +CONFIG_PPP_SYNC_TTY=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_MPPE=y +# CONFIG_PPPOE is not set +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_SLIP is not set +CONFIG_SLHC=y +# CONFIG_NET_FC is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_VMXNET3 is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +CONFIG_INPUT_FF_MEMLESS=y +# CONFIG_INPUT_POLLDEV is not set +# CONFIG_INPUT_SPARSEKMAP is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set +CONFIG_INPUT_KEYRESET=y +CONFIG_INPUT_LID=y + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ADP5588 is not set +# CONFIG_KEYBOARD_ADP5589 is not set +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_QT1070 is not set +# CONFIG_KEYBOARD_QT2160 is not set +# CONFIG_KEYBOARD_LKKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_KEYBOARD_TCA6416 is not set +# CONFIG_KEYBOARD_MATRIX is not set +# CONFIG_KEYBOARD_LM8323 is not set +# CONFIG_KEYBOARD_MAX7359 is not set +# CONFIG_KEYBOARD_MCS is not set +# CONFIG_KEYBOARD_MPR121 is not set +# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_KEYBOARD_TEGRA=y +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_AS5011 is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TABLET_USB_WACOM=y +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_ADS7846 is not set +# CONFIG_TOUCHSCREEN_AD7877 is not set +# CONFIG_TOUCHSCREEN_AD7879 is not set +# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set +# CONFIG_TOUCHSCREEN_BU21013 is not set +# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set +# CONFIG_TOUCHSCREEN_DYNAPRO is not set +# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set +# CONFIG_TOUCHSCREEN_EETI is not set +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set +# CONFIG_TOUCHSCREEN_MAX11801 is not set +# CONFIG_TOUCHSCREEN_MCS5000 is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_PANJIT_I2C is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2005 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set +# CONFIG_TOUCHSCREEN_W90X900 is not set +# CONFIG_TOUCHSCREEN_ST1232 is not set +# CONFIG_TOUCHSCREEN_TPS6507X is not set +CONFIG_TOUCHSCREEN_ELAN_TF_3K=y +CONFIG_TOUCHSCREEN_RM31080A=y +CONFIG_TOUCHSCREEN_SYN_RMI4_SPI=y +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_AD714X is not set +# CONFIG_INPUT_MMA8450 is not set +# CONFIG_INPUT_MPU3050 is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +CONFIG_INPUT_KEYCHORD=y +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_KXTJ9 is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_INPUT_PCF8574 is not set +# CONFIG_INPUT_PWM_BEEPER is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set +# CONFIG_INPUT_ADXL34X is not set +# CONFIG_INPUT_CMA3000 is not set +# CONFIG_INPUT_ALPS_GPIO_SCROLLWHEEL is not set +# CONFIG_INPUT_CAPELLA_CM3217 is not set + +# +# Proximity sensors +# +# CONFIG_SENSORS_CAP1106 is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO_ALTERA_PS2 is not set +# CONFIG_SERIO_PS2MULT is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set +# CONFIG_N_GSM is not set +# CONFIG_TRACE_SINK is not set +CONFIG_DEVMEM=y +# CONFIG_DEVKMEM is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_TEGRA=y +# CONFIG_SERIAL_MAX3100 is not set +# CONFIG_SERIAL_MAX3107 is not set +# CONFIG_SERIAL_MFD_HSU is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +# CONFIG_SERIAL_TIMBERDALE is not set +# CONFIG_SERIAL_ALTERA_JTAGUART is not set +# CONFIG_SERIAL_ALTERA_UART is not set +# CONFIG_SERIAL_IFX6X60 is not set +# CONFIG_SERIAL_PCH_UART is not set +# CONFIG_SERIAL_XILINX_PS_UART is not set +# CONFIG_TTY_PRINTK is not set +# CONFIG_HVC_DCC is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_DCC_TTY is not set +# CONFIG_RAMOOPS is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_COMPAT is not set +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_MUX=y + +# +# Multiplexer I2C Chip support +# +# CONFIG_I2C_MUX_GPIO is not set +# CONFIG_I2C_MUX_PCA9541 is not set +CONFIG_I2C_MUX_PCA954x=y +# CONFIG_I2C_SLAVE is not set +# CONFIG_I2C_HELPER_AUTO is not set +# CONFIG_I2C_SMBUS is not set + +# +# I2C Algorithms +# +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_ISCH is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_DESIGNWARE is not set +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_INTEL_MID is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_PXA_PCI is not set +# CONFIG_I2C_SIMTEC is not set +CONFIG_I2C_TEGRA=y +# CONFIG_I2C_XILINX is not set +# CONFIG_I2C_EG20T is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_DIOLAN_U2C is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +# CONFIG_SPI_ALTERA is not set +# CONFIG_SPI_BITBANG is not set +# CONFIG_SPI_GPIO is not set +# CONFIG_SPI_OC_TINY is not set +# CONFIG_SPI_PXA2XX_PCI is not set +CONFIG_SPI_TEGRA=y +CONFIG_SPI_SLAVE_TEGRA=y +# CONFIG_SPI_TOPCLIFF_PCH is not set +# CONFIG_SPI_XILINX is not set +# CONFIG_SPI_DESIGNWARE is not set + +# +# SPI Protocol Masters +# +# CONFIG_SPI_SPIDEV is not set +# CONFIG_SPI_TLE62X0 is not set + +# +# PPS support +# +# CONFIG_PPS is not set + +# +# PPS generators support +# + +# +# PTP clock support +# + +# +# Enable Device Drivers -> PPS to see the PTP clock options. +# +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y + +# +# Memory mapped GPIO drivers: +# +# CONFIG_GPIO_GENERIC_PLATFORM is not set +# CONFIG_GPIO_IT8761E is not set +# CONFIG_GPIO_VX855 is not set + +# +# I2C GPIO expanders: +# +# CONFIG_GPIO_MAX7300 is not set +# CONFIG_GPIO_MAX732X is not set +# CONFIG_GPIO_PCA953X_IRQ is not set +# CONFIG_GPIO_PCF857X is not set +# CONFIG_GPIO_SX150X is not set +# CONFIG_GPIO_ADP5588 is not set + +# +# PCI GPIO expanders: +# +# CONFIG_GPIO_BT8XX is not set +# CONFIG_GPIO_ML_IOH is not set +# CONFIG_GPIO_RDC321X is not set + +# +# SPI GPIO expanders: +# +# CONFIG_GPIO_MAX7301 is not set +# CONFIG_GPIO_MCP23S08 is not set +# CONFIG_GPIO_MC33880 is not set +# CONFIG_GPIO_74X164 is not set + +# +# AC97 GPIO expanders: +# + +# +# MODULbus GPIO expanders: +# +CONFIG_GPIO_TPS65910=y +# CONFIG_W1 is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_TEST_POWER is not set +# CONFIG_BATTERY_DS2780 is not set +# CONFIG_BATTERY_DS2782 is not set +# CONFIG_BATTERY_BQ20Z75 is not set +# CONFIG_BATTERY_BQ27x00 is not set +# CONFIG_CHARGER_TPS8003X is not set +# CONFIG_BATTERY_GAUGE_TPS8003X is not set +CONFIG_CHARGER_SMB347=y +# CONFIG_BATTERY_MAX17040 is not set +# CONFIG_BATTERY_MAX17042 is not set +# CONFIG_BATTERY_MAX17048 is not set +# CONFIG_CHARGER_ISP1704 is not set +# CONFIG_CHARGER_MAX8903 is not set +# CONFIG_CHARGER_GPIO is not set +CONFIG_BATTERY_BQ27541=y +# CONFIG_TEGRA_BPC_MGMT is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Native drivers +# +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADCXX is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7411 is not set +# CONFIG_SENSORS_ADT7461 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_ASC7621 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS620 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_GPIO_FAN is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_JC42 is not set +# CONFIG_SENSORS_LINEAGE is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM70 is not set +# CONFIG_SENSORS_LM73 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4151 is not set +# CONFIG_SENSORS_LTC4215 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LTC4261 is not set +# CONFIG_SENSORS_LM95241 is not set +# CONFIG_SENSORS_LM95245 is not set +# CONFIG_SENSORS_MAX1111 is not set +# CONFIG_SENSORS_MAX16065 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX1668 is not set +# CONFIG_SENSORS_MAX6639 is not set +# CONFIG_SENSORS_MAX6642 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_NTC_THERMISTOR is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_PMBUS is not set +# CONFIG_SENSORS_SHT15 is not set +# CONFIG_SENSORS_SHT21 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMM665 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_EMC1403 is not set +# CONFIG_SENSORS_EMC2103 is not set +# CONFIG_SENSORS_EMC6W201 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SCH56XX_COMMON is not set +# CONFIG_SENSORS_SCH5627 is not set +# CONFIG_SENSORS_SCH5636 is not set +# CONFIG_SENSORS_ADS1015 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_ADS7871 is not set +# CONFIG_SENSORS_AMC6821 is not set +CONFIG_SENSORS_TEGRA_TSENSOR=y +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_TMP102 is not set +# CONFIG_SENSORS_TMP401 is not set +# CONFIG_SENSORS_TMP421 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83795 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +CONFIG_SENSORS_INA219=y +# CONFIG_SENSORS_INA230 is not set +CONFIG_SENSORS_AL3010=y +CONFIG_THERMAL=y +CONFIG_THERMAL_HWMON=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_CORE is not set +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_DW_WATCHDOG is not set +# CONFIG_MPCORE_WATCHDOG is not set +CONFIG_TEGRA_WATCHDOG=y +CONFIG_TEGRA_WATCHDOG_ENABLE_ON_PROBE=y +# CONFIG_MAX63XX_WATCHDOG is not set +# CONFIG_ALIM7101_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set +CONFIG_BCMA_POSSIBLE=y + +# +# Broadcom specific AMBA +# +# CONFIG_BCMA is not set +CONFIG_MFD_SUPPORT=y +CONFIG_MFD_CORE=y +# CONFIG_MFD_88PM860X is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_HTC_I2CPLD is not set +# CONFIG_TPS6105X is not set +# CONFIG_TPS65010 is not set +# CONFIG_TPS6507X is not set +CONFIG_MFD_TPS6586X=y +CONFIG_MFD_TPS65910=y +# CONFIG_MFD_TPS65912_I2C is not set +# CONFIG_MFD_TPS65912_SPI is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_MFD_STMPE is not set +# CONFIG_MFD_TC3589X is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_PMIC_ADP5520 is not set +# CONFIG_MFD_MAX8925 is not set +# CONFIG_MFD_MAX8997 is not set +# CONFIG_MFD_MAX8998 is not set +# CONFIG_MFD_MAX8907C is not set +CONFIG_MFD_MAX77663=y +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM831X_I2C is not set +# CONFIG_MFD_WM831X_SPI is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_WM8994 is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_MFD_MC13XXX is not set +# CONFIG_ABX500_CORE is not set +# CONFIG_EZX_PCAP is not set +# CONFIG_MFD_TIMBERDALE is not set +# CONFIG_LPC_SCH is not set +# CONFIG_MFD_RDC321X is not set +# CONFIG_MFD_JANZ_CMODIO is not set +# CONFIG_MFD_VX855 is not set +# CONFIG_MFD_WL1273_CORE is not set +# CONFIG_MFD_AAT2870_CORE is not set +CONFIG_MFD_TPS6591X=y +# CONFIG_MFD_TPS65090 is not set +# CONFIG_MFD_RC5T583 is not set +CONFIG_MFD_TPS80031=y +CONFIG_GPADC_TPS80031=y +CONFIG_MFD_RICOH583=y +CONFIG_REGULATOR=y +# CONFIG_REGULATOR_DEBUG is not set +# CONFIG_REGULATOR_DUMMY is not set +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_VIRTUAL_CONSUMER=y +# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_GPIO is not set +# CONFIG_REGULATOR_BQ24022 is not set +# CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX8649 is not set +# CONFIG_REGULATOR_MAX8660 is not set +# CONFIG_REGULATOR_MAX8952 is not set +CONFIG_REGULATOR_MAX77663=y +# CONFIG_REGULATOR_LP3971 is not set +# CONFIG_REGULATOR_LP3972 is not set +# CONFIG_REGULATOR_TPS65023 is not set +# CONFIG_REGULATOR_TPS6507X is not set +# CONFIG_REGULATOR_ISL6271A is not set +# CONFIG_REGULATOR_AD5398 is not set +CONFIG_REGULATOR_TPS6586X=y +# CONFIG_REGULATOR_TPS6524X is not set +CONFIG_REGULATOR_TPS65910=y +CONFIG_REGULATOR_TPS62360=y +CONFIG_REGULATOR_TPS6591X=y +CONFIG_REGULATOR_TPS80031=y +CONFIG_REGULATOR_RICOH583=y +# CONFIG_REGULATOR_FAN53555 is not set +CONFIG_MEDIA_SUPPORT=y + +# +# Multimedia core support +# +# CONFIG_MEDIA_CONTROLLER is not set +CONFIG_VIDEO_DEV=y +CONFIG_VIDEO_V4L2_COMMON=y +# CONFIG_DVB_CORE is not set +CONFIG_VIDEO_MEDIA=y + +# +# Multimedia drivers +# +# CONFIG_RC_CORE is not set +# CONFIG_MEDIA_ATTACH is not set +CONFIG_MEDIA_TUNER=y +# CONFIG_MEDIA_TUNER_CUSTOMISE is not set +CONFIG_MEDIA_TUNER_SIMPLE=y +CONFIG_MEDIA_TUNER_TDA8290=y +CONFIG_MEDIA_TUNER_TDA827X=y +CONFIG_MEDIA_TUNER_TDA18271=y +CONFIG_MEDIA_TUNER_TDA9887=y +CONFIG_MEDIA_TUNER_TEA5761=y +CONFIG_MEDIA_TUNER_TEA5767=y +CONFIG_MEDIA_TUNER_MT20XX=y +CONFIG_MEDIA_TUNER_XC2028=y +CONFIG_MEDIA_TUNER_XC5000=y +CONFIG_MEDIA_TUNER_XC4000=y +CONFIG_MEDIA_TUNER_MC44S803=y +CONFIG_VIDEO_V4L2=y +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +CONFIG_VIDEO_HELPER_CHIPS_AUTO=y + +# +# Audio decoders, processors and mixers +# + +# +# RDS decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# + +# +# Video encoders +# + +# +# Camera sensor devices +# + +# +# Flash devices +# + +# +# Video improvement chips +# + +# +# Miscelaneous helper chips +# +CONFIG_TEGRA_RPC=y +# CONFIG_TEGRA_AVP is not set +# CONFIG_TEGRA_MEDIASERVER is not set +CONFIG_TEGRA_NVAVP=y +CONFIG_TEGRA_CAMERA=y +CONFIG_VIDEO_MI1040=y +CONFIG_TEGRA_DTV=y +# CONFIG_VIDEO_OV5650 is not set +# CONFIG_VIDEO_OV14810 is not set +# CONFIG_VIDEO_OV9726 is not set +# CONFIG_VIDEO_OV2710 is not set +# CONFIG_VIDEO_AR0832 is not set +# CONFIG_VIDEO_SOC380 is not set +# CONFIG_TORCH_SSL3250A is not set +# CONFIG_TORCH_TPS61050 is not set +# CONFIG_VIDEO_SH532U is not set +# CONFIG_VIDEO_AD5820 is not set +# CONFIG_VIDEO_CPIA2 is not set +# CONFIG_VIDEO_SAA7134 is not set +# CONFIG_VIDEO_MXB is not set +# CONFIG_VIDEO_HEXIUM_ORION is not set +# CONFIG_VIDEO_HEXIUM_GEMINI is not set +# CONFIG_VIDEO_CAFE_CCIC is not set +# CONFIG_VIDEO_SR030PC30 is not set +# CONFIG_VIDEO_NOON010PC30 is not set +# CONFIG_SOC_CAMERA is not set +CONFIG_V4L_USB_DRIVERS=y +CONFIG_USB_VIDEO_CLASS=y +CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y +# CONFIG_USB_GSPCA is not set +# CONFIG_VIDEO_PVRUSB2 is not set +# CONFIG_VIDEO_HDPVR is not set +# CONFIG_VIDEO_EM28XX is not set +# CONFIG_VIDEO_USBVISION is not set +# CONFIG_USB_ET61X251 is not set +# CONFIG_USB_SN9C102 is not set +# CONFIG_USB_PWC is not set +# CONFIG_USB_ZR364XX is not set +# CONFIG_USB_STKWEBCAM is not set +# CONFIG_USB_S2255 is not set +# CONFIG_V4L_MEM2MEM_DRIVERS is not set +# CONFIG_RADIO_ADAPTERS is not set + +# +# Graphics support +# +CONFIG_VGA_ARB=y +CONFIG_VGA_ARB_MAX_GPUS=16 +# CONFIG_DRM is not set +# CONFIG_STUB_POULSBO is not set +# CONFIG_ION is not set +# CONFIG_VGASTATE is not set +CONFIG_VIDEO_OUTPUT_CONTROL=y +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_WMT_GE_ROPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set +# CONFIG_FB_TMIO is not set +# CONFIG_FB_UDL is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set + +# +# NVIDIA Tegra Display Driver options +# +CONFIG_TEGRA_GRHOST=y +CONFIG_TEGRA_DC=y +CONFIG_FB_TEGRA=y +CONFIG_TEGRA_DC_EXTENSIONS=y +CONFIG_TEGRA_NVMAP=y +CONFIG_NVMAP_RECLAIM_UNPINNED_VM=y +CONFIG_NVMAP_ALLOW_SYSMEM=y +# CONFIG_NVMAP_HIGHMEM_ONLY is not set +# CONFIG_NVMAP_CARVEOUT_KILLER is not set +CONFIG_NVMAP_CARVEOUT_COMPACTOR=y +# CONFIG_NVMAP_VPR is not set +CONFIG_TEGRA_DSI=y +CONFIG_NVMAP_CONVERT_CARVEOUT_TO_IOVMM=y +CONFIG_TEGRA_NVHDCP=y +# CONFIG_TEGRA_HDMI_74MHZ_LIMIT is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +# CONFIG_LCD_L4F00242T03 is not set +# CONFIG_LCD_LMS283GF05 is not set +# CONFIG_LCD_LTV350QV is not set +# CONFIG_LCD_TDO24M is not set +# CONFIG_LCD_VGG2432A4 is not set +# CONFIG_LCD_PLATFORM is not set +# CONFIG_LCD_S6E63M0 is not set +# CONFIG_LCD_LD9040 is not set +# CONFIG_LCD_AMS369FG06 is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_BACKLIGHT_PWM=y +CONFIG_BACKLIGHT_TEGRA_PWM=y +# CONFIG_BACKLIGHT_ADP8860 is not set +# CONFIG_BACKLIGHT_ADP8870 is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_LOGO is not set +CONFIG_SOUND=y +# CONFIG_SOUND_OSS_CORE is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_RAWMIDI=y +CONFIG_SND_JACK=y +# CONFIG_SND_SEQUENCER is not set +# CONFIG_SND_MIXER_OSS is not set +# CONFIG_SND_PCM_OSS is not set +# CONFIG_SND_HRTIMER is not set +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y +# CONFIG_SND_RAWMIDI_SEQ is not set +# CONFIG_SND_OPL3_LIB_SEQ is not set +# CONFIG_SND_OPL4_LIB_SEQ is not set +# CONFIG_SND_SBAWE_SEQ is not set +# CONFIG_SND_EMU10K1_SEQ is not set +CONFIG_SND_DRIVERS=y +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_ALOOP is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set +CONFIG_SND_PCI=y +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_CTXFI is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_PREALLOC_SIZE=64 +# CONFIG_SND_HDA_HWDEP is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set +# CONFIG_SND_HDA_INPUT_JACK is not set +# CONFIG_SND_HDA_PATCH_LOADER is not set +CONFIG_SND_HDA_PLATFORM_DRIVER=y +CONFIG_SND_HDA_PLATFORM_NVIDIA_TEGRA=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_ENABLE_REALTEK_QUIRKS=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_CIRRUS=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CA0110=y +CONFIG_SND_HDA_CODEC_CA0132=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10 +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_LOLA is not set +# CONFIG_SND_LX6464ES is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set +CONFIG_SND_ARM=y +CONFIG_SND_SPI=y +CONFIG_SND_USB=y +CONFIG_SND_USB_AUDIO=y +# CONFIG_SND_USB_UA101 is not set +# CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_6FIRE is not set +CONFIG_SND_SOC=y +# CONFIG_SND_SOC_CACHE_LZO is not set +CONFIG_SND_SOC_TEGRA=y +CONFIG_SND_SOC_TEGRA30_AHUB=y +CONFIG_SND_SOC_TEGRA30_DAM=y +CONFIG_SND_SOC_TEGRA30_I2S=y +CONFIG_SND_SOC_TEGRA30_SPDIF=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5639=y +# CONFIG_SND_SOC_TEGRA_RT5639 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5640=y +CONFIG_SND_SOC_TEGRA_RT5640=y +CONFIG_HEADSET_FUNCTION=y +CONFIG_SND_SOC_I2C_AND_SPI=y +# CONFIG_SND_SOC_ALL_CODECS is not set +CONFIG_SND_SOC_RT5640=y +CONFIG_SND_SOC_RT5642=y +CONFIG_SND_SOC_SPDIF=y +# CONFIG_SND_SOC_TLV320AIC326X is not set +# CONFIG_SOUND_PRIME is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HIDRAW is not set +# CONFIG_UHID is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_HID_PID is not set +# CONFIG_USB_HIDDEV is not set + +# +# Special HID drivers +# +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +# CONFIG_HID_PRODIKEYS is not set +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HOLTEK_FF=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIWII_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +# CONFIG_HID_PICOLCD is not set +# CONFIG_HID_QUANTA is not set +# CONFIG_HID_ROCCAT is not set +# CONFIG_HID_SAMSUNG is not set +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_HID_WACOM=y +# CONFIG_HID_WACOM_POWER_SUPPLY is not set +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_ZEROPLUS_FF=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y +# CONFIG_USB_DYNAMIC_MINORS is not set +CONFIG_USB_SUSPEND=y +CONFIG_USB_OTG=y +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set +# CONFIG_USB_MON is not set +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +# CONFIG_USB_XHCI_HCD is not set +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_TT_NEWSCHED=y +CONFIG_USB_EHCI_TEGRA=y +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set +# CONFIG_USB_ISP1362_HCD is not set +# CONFIG_USB_OHCI_HCD is not set +# CONFIG_USB_UHCI_HCD is not set +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set +# CONFIG_USB_EHCI_ONOFF_FEATURE is not set +# CONFIG_USB_MUSB_HDRC is not set + +# +# USB Device Class drivers +# +CONFIG_USB_ACM=y +# CONFIG_USB_PRINTER is not set +CONFIG_USB_WDM=y +# CONFIG_USB_TMC is not set + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +# + +# +# also be needed; see USB_STORAGE Help for more info +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_REALTEK is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +# CONFIG_USB_STORAGE_ENE_UB6250 is not set +# CONFIG_USB_UAS is not set +CONFIG_USB_LIBUSUAL=y + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB port drivers +# +CONFIG_USB_SERIAL=y +# CONFIG_USB_SERIAL_CONSOLE is not set +# CONFIG_USB_EZUSB is not set +# CONFIG_USB_SERIAL_GENERIC is not set +# CONFIG_USB_SERIAL_AIRCABLE is not set +# CONFIG_USB_SERIAL_ARK3116 is not set +# CONFIG_USB_SERIAL_BELKIN is not set +# CONFIG_USB_SERIAL_CH341 is not set +# CONFIG_USB_SERIAL_WHITEHEAT is not set +# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set +# CONFIG_USB_SERIAL_CP210X is not set +# CONFIG_USB_SERIAL_CYPRESS_M8 is not set +# CONFIG_USB_SERIAL_EMPEG is not set +# CONFIG_USB_SERIAL_FTDI_SIO is not set +# CONFIG_USB_SERIAL_FUNSOFT is not set +# CONFIG_USB_SERIAL_VISOR is not set +# CONFIG_USB_SERIAL_IPAQ is not set +# CONFIG_USB_SERIAL_IR is not set +# CONFIG_USB_SERIAL_EDGEPORT is not set +# CONFIG_USB_SERIAL_EDGEPORT_TI is not set +# CONFIG_USB_SERIAL_GARMIN is not set +# CONFIG_USB_SERIAL_IPW is not set +# CONFIG_USB_SERIAL_IUU is not set +# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set +# CONFIG_USB_SERIAL_KEYSPAN is not set +# CONFIG_USB_SERIAL_KLSI is not set +# CONFIG_USB_SERIAL_KOBIL_SCT is not set +# CONFIG_USB_SERIAL_MCT_U232 is not set +# CONFIG_USB_SERIAL_MOS7720 is not set +# CONFIG_USB_SERIAL_MOS7840 is not set +# CONFIG_USB_SERIAL_MOTOROLA is not set +# CONFIG_USB_SERIAL_NAVMAN is not set +CONFIG_USB_SERIAL_PL2303=y +# CONFIG_USB_SERIAL_OTI6858 is not set +# CONFIG_USB_SERIAL_QCAUX is not set +# CONFIG_USB_SERIAL_QUALCOMM is not set +# CONFIG_USB_SERIAL_SPCP8X5 is not set +# CONFIG_USB_SERIAL_HP4X is not set +# CONFIG_USB_SERIAL_SAFE is not set +# CONFIG_USB_SERIAL_SIEMENS_MPI is not set +# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set +# CONFIG_USB_SERIAL_SYMBOL is not set +# CONFIG_USB_SERIAL_TI is not set +# CONFIG_USB_SERIAL_CYBERJACK is not set +# CONFIG_USB_SERIAL_XIRCOM is not set +CONFIG_USB_SERIAL_WWAN=y +CONFIG_USB_SERIAL_OPTION=y +# CONFIG_USB_SERIAL_OMNINET is not set +# CONFIG_USB_SERIAL_OPTICON is not set +# CONFIG_USB_SERIAL_VIVOPAY_SERIAL is not set +# CONFIG_USB_SERIAL_ZIO is not set +# CONFIG_USB_SERIAL_SSU100 is not set +# CONFIG_USB_SERIAL_DEBUG is not set +# CONFIG_USB_SERIAL_BASEBAND is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_YUREX is not set +CONFIG_USB_GADGET=y +# CONFIG_USB_GADGET_DEBUG is not set +# CONFIG_USB_GADGET_DEBUG_FILES is not set +# CONFIG_USB_GADGET_DEBUG_FS is not set +CONFIG_USB_GADGET_VBUS_DRAW=500 +CONFIG_USB_FSL_USB2=y +# CONFIG_USB_FUSB300 is not set +# CONFIG_USB_R8A66597 is not set +# CONFIG_USB_M66592 is not set +# CONFIG_USB_AMD5536UDC is not set +# CONFIG_USB_CI13XXX_PCI is not set +# CONFIG_USB_NET2272 is not set +# CONFIG_USB_NET2280 is not set +# CONFIG_USB_GOKU is not set +# CONFIG_USB_LANGWELL is not set +# CONFIG_USB_EG20T is not set +# CONFIG_USB_DUMMY_HCD is not set +CONFIG_USB_GADGET_DUALSPEED=y +# CONFIG_USB_ZERO is not set +# CONFIG_USB_AUDIO is not set +# CONFIG_USB_ETH is not set +# CONFIG_USB_G_NCM is not set +# CONFIG_USB_GADGETFS is not set +# CONFIG_USB_FUNCTIONFS is not set +# CONFIG_USB_FILE_STORAGE is not set +# CONFIG_USB_MASS_STORAGE is not set +# CONFIG_USB_G_SERIAL is not set +# CONFIG_USB_MIDI_GADGET is not set +# CONFIG_USB_G_PRINTER is not set +CONFIG_USB_G_ANDROID=y +# CONFIG_USB_CDC_COMPOSITE is not set +# CONFIG_USB_G_MULTI is not set +# CONFIG_USB_G_HID is not set +# CONFIG_USB_G_DBGP is not set +# CONFIG_USB_G_WEBCAM is not set + +# +# OTG and related infrastructure +# +CONFIG_USB_OTG_UTILS=y +# CONFIG_USB_OTG_WAKELOCK is not set +# CONFIG_USB_GPIO_VBUS is not set +CONFIG_USB_ULPI=y +CONFIG_USB_ULPI_VIEWPORT=y +# CONFIG_NOP_USB_XCEIV is not set +CONFIG_USB_TEGRA_OTG=y +# CONFIG_UWB is not set +CONFIG_MMC=y +# CONFIG_MMC_DEBUG is not set +CONFIG_MMC_UNSAFE_RESUME=y +# CONFIG_MMC_CLKGATE is not set +CONFIG_MMC_EMBEDDED_SDIO=y +# CONFIG_MMC_PARANOID_SD_INIT is not set + +# +# MMC/SD/SDIO Card Drivers +# +CONFIG_MMC_BLOCK=y +CONFIG_MMC_BLOCK_MINORS=16 +CONFIG_MMC_BLOCK_BOUNCE=y +CONFIG_MMC_BLOCK_DEFERRED_RESUME=y +# CONFIG_SDIO_UART is not set +CONFIG_MMC_TEST=y + +# +# MMC/SD/SDIO Host Controller Drivers +# +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_IO_ACCESSORS=y +# CONFIG_MMC_SDHCI_PCI is not set +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_TEGRA=y +# CONFIG_MMC_SDHCI_PXAV3 is not set +# CONFIG_MMC_SDHCI_PXAV2 is not set +# CONFIG_MMC_TIFM_SD is not set +# CONFIG_MMC_CB710 is not set +# CONFIG_MMC_VIA_SDMMC is not set +# CONFIG_MMC_DW is not set +# CONFIG_MMC_VUB300 is not set +# CONFIG_MMC_USHC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y + +# +# LED drivers +# +# CONFIG_LEDS_LM3530 is not set +# CONFIG_LEDS_PCA9532 is not set +CONFIG_LEDS_GPIO=y +# CONFIG_LEDS_LP3944 is not set +# CONFIG_LEDS_LP5521 is not set +# CONFIG_LEDS_LP5523 is not set +# CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_DAC124S085 is not set +# CONFIG_LEDS_PWM is not set +# CONFIG_LEDS_REGULATOR is not set +# CONFIG_LEDS_BD2802 is not set +# CONFIG_LEDS_LT3593 is not set +# CONFIG_LEDS_TRIGGERS is not set + +# +# LED Triggers +# +CONFIG_SWITCH=y +# CONFIG_SWITCH_GPIO is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +CONFIG_RTC_INTF_ALARM=y +CONFIG_RTC_INTF_ALARM_DEV=y +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_DS3232 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +CONFIG_RTC_DRV_MAX77663=y +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_ISL12022 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_BQ32K is not set +CONFIG_RTC_DRV_TPS6586X=y +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set +# CONFIG_RTC_DRV_EM3027 is not set +# CONFIG_RTC_DRV_RV3029C2 is not set + +# +# SPI RTC drivers +# +# CONFIG_RTC_DRV_M41T93 is not set +# CONFIG_RTC_DRV_M41T94 is not set +# CONFIG_RTC_DRV_DS1305 is not set +# CONFIG_RTC_DRV_DS1390 is not set +# CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_R9701 is not set +# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_DS3234 is not set +# CONFIG_RTC_DRV_PCF2123 is not set + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_CMOS is not set +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_MSM6242 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_RP5C01 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +# CONFIG_RTC_DRV_TEGRA is not set +CONFIG_RTC_DRV_TPS6591x=y +CONFIG_RTC_DRV_TPS80031=y +CONFIG_RTC_DRV_RC5T583=y +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set + +# +# Virtio drivers +# +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set +CONFIG_STAGING=y +# CONFIG_ET131X is not set +# CONFIG_USBIP_CORE is not set +# CONFIG_PRISM2_USB is not set +# CONFIG_ECHO is not set +# CONFIG_BRCMUTIL is not set +# CONFIG_ASUS_OLED is not set +# CONFIG_R8187SE is not set +# CONFIG_RTL8192U is not set +# CONFIG_RTL8192E is not set +# CONFIG_R8712U is not set +# CONFIG_RTS_PSTOR is not set +# CONFIG_TRANZPORT is not set + +# +# Android +# +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_RAM_CONSOLE=y +CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION=y +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE=128 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE=16 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE=8 +CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL=0x11d +# CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT is not set +CONFIG_ANDROID_TIMED_OUTPUT=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +# CONFIG_POHMELFS is not set +# CONFIG_LINE6_USB is not set +# CONFIG_USB_SERIAL_QUATECH2 is not set +# CONFIG_USB_SERIAL_QUATECH_USB2 is not set +# CONFIG_VT6655 is not set +# CONFIG_VT6656 is not set +# CONFIG_VME_BUS is not set +# CONFIG_DX_SEP is not set +CONFIG_IIO=y +# CONFIG_IIO_ST_HWMON is not set +CONFIG_IIO_BUFFER=y +# CONFIG_IIO_SW_RING is not set +CONFIG_IIO_KFIFO_BUF=y +CONFIG_IIO_TRIGGER=y +CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 + +# +# Accelerometers +# +# CONFIG_ADIS16201 is not set +# CONFIG_ADIS16203 is not set +# CONFIG_ADIS16204 is not set +# CONFIG_ADIS16209 is not set +# CONFIG_ADIS16220 is not set +# CONFIG_ADIS16240 is not set +# CONFIG_KXSD9 is not set +# CONFIG_LIS3L02DQ is not set + +# +# Analog to digital convertors +# +# CONFIG_AD7150 is not set +# CONFIG_AD7152 is not set +# CONFIG_AD7291 is not set +# CONFIG_AD7298 is not set +# CONFIG_AD7314 is not set +# CONFIG_AD7606 is not set +# CONFIG_AD799X is not set +# CONFIG_AD7476 is not set +# CONFIG_AD7887 is not set +# CONFIG_AD7780 is not set +# CONFIG_AD7793 is not set +# CONFIG_AD7745 is not set +# CONFIG_AD7816 is not set +# CONFIG_ADT75 is not set +# CONFIG_ADT7310 is not set +# CONFIG_ADT7410 is not set +# CONFIG_MAX1363 is not set + +# +# Analog digital bi-direction convertors +# +# CONFIG_ADT7316 is not set + +# +# Digital to analog convertors +# +# CONFIG_AD5624R_SPI is not set +# CONFIG_AD5446 is not set +# CONFIG_AD5504 is not set +# CONFIG_AD5791 is not set +# CONFIG_AD5686 is not set +# CONFIG_MAX517 is not set + +# +# Direct Digital Synthesis +# +# CONFIG_AD5930 is not set +# CONFIG_AD9832 is not set +# CONFIG_AD9834 is not set +# CONFIG_AD9850 is not set +# CONFIG_AD9852 is not set +# CONFIG_AD9910 is not set +# CONFIG_AD9951 is not set + +# +# Digital gyroscope sensors +# +# CONFIG_ADIS16060 is not set +# CONFIG_ADIS16080 is not set +# CONFIG_ADIS16130 is not set +# CONFIG_ADIS16260 is not set +# CONFIG_ADXRS450 is not set + +# +# Inertial measurement units +# +# CONFIG_ADIS16400 is not set +CONFIG_INV_MPU_IIO=y + +# +# Light sensors +# +# CONFIG_SENSORS_ISL29018 is not set +CONFIG_SENSORS_ISL29028=y +# CONFIG_SENSORS_TSL2563 is not set +# CONFIG_TSL2583 is not set +CONFIG_SENSORS_LTR558=y + +# +# Magnetometer sensors +# +# CONFIG_SENSORS_HMC5843 is not set +CONFIG_AMI306=y + +# +# Active energy metering IC +# +# CONFIG_ADE7753 is not set +# CONFIG_ADE7754 is not set +# CONFIG_ADE7758 is not set +# CONFIG_ADE7759 is not set +# CONFIG_ADE7854 is not set + +# +# Resolver to digital converters +# +# CONFIG_AD2S90 is not set +# CONFIG_AD2S120X is not set +# CONFIG_AD2S1210 is not set + +# +# Triggers - standalone +# +# CONFIG_IIO_PERIODIC_RTC_TRIGGER is not set +# CONFIG_IIO_GPIO_TRIGGER is not set +# CONFIG_IIO_SYSFS_TRIGGER is not set +# CONFIG_IIO_SIMPLE_DUMMY is not set +CONFIG_XVMALLOC=y +CONFIG_ZRAM=y +# CONFIG_ZRAM_DEBUG is not set +# CONFIG_FB_SM7XX is not set +# CONFIG_VIDEO_DT3155 is not set +# CONFIG_CRYSTALHD is not set +# CONFIG_FB_XGI is not set +# CONFIG_EASYCAP is not set +# CONFIG_SOLO6X10 is not set +# CONFIG_ATH6K_LEGACY is not set +# CONFIG_USB_ENESTORAGE is not set +# CONFIG_BCM_WIMAX is not set +# CONFIG_FT1000 is not set + +# +# Speakup console speech +# +# CONFIG_TOUCHSCREEN_CLEARPAD_TM1217 is not set +# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4 is not set +# CONFIG_ALTERA_STAPL is not set +# CONFIG_MFD_NVEC is not set +CONFIG_CLKDEV_LOOKUP=y +CONFIG_CLKSRC_MMIO=y +CONFIG_IOMMU_SUPPORT=y +# CONFIG_TEGRA_IOMMU_SMMU is not set +# CONFIG_VIRT_DRIVERS is not set +# CONFIG_RIL is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_USE_FOR_EXT23=y +CONFIG_EXT4_FS_XATTR=y +CONFIG_EXT4_FS_POSIX_ACL=y +# CONFIG_EXT4_FS_SECURITY is not set +# CONFIG_EXT4_DEBUG is not set +CONFIG_JBD2=y +# CONFIG_JBD2_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y +# CONFIG_DNOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_FANOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_QUOTACTL is not set +# CONFIG_AUTOFS4_FS is not set +CONFIG_FUSE_FS=y +# CONFIG_CUSE is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +CONFIG_NTFS_FS=y +# CONFIG_NTFS_DEBUG is not set +CONFIG_NTFS_RW=y + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_REPORT_PRESENT_CPUS=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_TMPFS_XATTR is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +CONFIG_HFS_FS=y +CONFIG_HFSPLUS_FS=y +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_LOGFS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_PSTORE is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +# CONFIG_NFS_V3 is not set +# CONFIG_NFS_V4 is not set +CONFIG_ROOT_NFS=y +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_CEPH_FS is not set +CONFIG_CIFS=y +# CONFIG_CIFS_STATS is not set +# CONFIG_CIFS_WEAK_PW_HASH is not set +# CONFIG_CIFS_XATTR is not set +# CONFIG_CIFS_DEBUG2 is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=y + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4 +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_SECTION_MISMATCH is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_LOCKUP_DETECTOR=y +# CONFIG_HARDLOCKUP_DETECTOR is not set +# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +# CONFIG_DETECT_HUNG_TASK is not set +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_SLUB_STATS is not set +# CONFIG_DEBUG_KMEMLEAK is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_SPARSE_RCU_POINTER is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_ATOMIC_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_STACKTRACE is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_TEST_LIST_SORT is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_RCU_CPU_STALL_VERBOSE is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_LKDTM is not set +# CONFIG_CPU_NOTIFIER_ERROR_INJECT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +# CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_TRACING_SUPPORT=y +# CONFIG_FTRACE is not set +CONFIG_DYNAMIC_DEBUG=y +# CONFIG_DMA_API_DEBUG is not set +# CONFIG_ATOMIC64_SELFTEST is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_TEST_KSTRTOX is not set +# CONFIG_STRICT_DEVMEM is not set +# CONFIG_ARM_UNWIND is not set +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_LL is not set +# CONFIG_OC_ETM is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY_DMESG_RESTRICT is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +CONFIG_TRUSTED_FOUNDATIONS=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP2=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_PCRYPT is not set +CONFIG_CRYPTO_WORKQUEUE=y +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=y +# CONFIG_CRYPTO_GHASH is not set +CONFIG_CRYPTO_MD4=y +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=y +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +CONFIG_CRYPTO_TWOFISH=y +CONFIG_CRYPTO_TWOFISH_COMMON=y + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=y +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_USER_API_HASH is not set +# CONFIG_CRYPTO_USER_API_SKCIPHER is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_CRYPTO_DEV_TEGRA_AES is not set +CONFIG_CRYPTO_DEV_TEGRA_SE=y +# CONFIG_BINARY_PRINTF is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_CRC_CCITT=y +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=y +# CONFIG_CRC8 is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +# CONFIG_XZ_DEC is not set +# CONFIG_XZ_DEC_BCJ is not set +CONFIG_DECOMPRESS_GZIP=y +CONFIG_REED_SOLOMON=y +CONFIG_REED_SOLOMON_ENC8=y +CONFIG_REED_SOLOMON_DEC8=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=y +CONFIG_TEXTSEARCH_BM=y +CONFIG_TEXTSEARCH_FSM=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_CPU_RMAP=y +CONFIG_NLATTR=y +# CONFIG_AVERAGE is not set +# CONFIG_CORDIC is not set From 3444864dcd30c0ddbc4a0d658cc07719d391310c Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 12:26:52 -0500 Subject: [PATCH 086/678] tegra: add LP overclock config interface --- arch/arm/configs/metallice_grouper_defconfig | 5 +++- arch/arm/mach-tegra/Kconfig | 26 ++++++++++++++++++++ arch/arm/mach-tegra/tegra3_clocks.c | 9 +++++++ arch/arm/mach-tegra/tegra3_dvfs.c | 11 +++++++-- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 4ac53e3a5eb..2be17e30686 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-446-620" +CONFIG_LOCALVERSION="-MKernel-446-550" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -315,6 +315,9 @@ CONFIG_GPU_OVERCLOCK=y CONFIG_GPU_OC_446=y # CONFIG_GPU_OC_484 is not set # CONFIG_GPU_OC_520 is not set +CONFIG_LP_OVERCLOCK=y +CONFIG_LP_OC_550=y +# CONFIG_LP_OC_620 is not set CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y CONFIG_TEGRA_IOVMM_SMMU=y diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 9f32b0a7412..73a9a4b0fc2 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -305,6 +305,32 @@ choice bool "520 MHz" endchoice + +config LP_OVERCLOCK + bool "Enable LP overclock for Tegra3" + depends on TEGRA_SILICON_PLATFORM + default n + help + Choose y to overclock the LP core. + If Off, maximum clock speed is 475MHz. + If On, LP clock speed can be selected. + +choice + + depends on LP_OVERCLOCK + prompt "Maximum LP Rate" + default GPU_OC_620 + ---help--- + Select the desired LP overclock rate. + + If you are not sure what you are doing, leave this + option alone! + config LP_OC_550 + bool "550 MHz" + config LP_OC_620 + bool "620 MHz" + +endchoice config TEGRA_CPU_DVFS bool "Enable voltage scaling on Tegra CPU" diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 48e216ceef8..625c2d6d71e 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4627,7 +4627,16 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_620 { 5, 620000 }, +#endif +#ifdef CONFIG_LP_OC_550 + { 5, 550000 }, +#endif +#else + { 5, 620000 }, +#endif { 6, 860000 }, { 7, 1000000 }, { 8, 1100000 }, diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 66b282e8d8c..2cc70c89b6c 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -167,7 +167,6 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), /* Nexus 7 - faking speedo id = 4, process id =2 - Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ // CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ @@ -226,8 +225,16 @@ static struct dvfs core_dvfs_table[] = { /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ /* Clock limits for internal blocks, PLLs */ CORE_DVFS("cpu_lp", 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), -// CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_620 CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), +#endif +#ifdef CONFIG_LP_OC_550 + CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 550000, 550000, 550000, 550000, 550000), +#endif +#else + CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), +#endif CORE_DVFS("cpu_lp", 2, 1, KHZ, 204000, 295000, 370000, 428000, 475000, 513000, 579000, 620000, 620000), CORE_DVFS("cpu_lp", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 450000, 450000, 450000), From 4902e521a20ac50659e7e3c9c16809a5b646c662 Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Tue, 18 Sep 2012 05:09:12 +0200 Subject: [PATCH 087/678] OTG HOST moe: disable charging of slave + enable vbus in --- drivers/usb/otg/tegra-otg.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index c1fe7f899f1..ef1792286e7 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -230,8 +230,12 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - if (tegra->charger_cb) - tegra->charger_cb(to, from, tegra->charger_cb_data); + if (tegra->charger_cb) { + // tmtmtm: disable charging of OTG slave + //tegra->charger_cb(to, from, tegra->charger_cb_data); + // tmtmtm: enable vbus in + tegra->detect_vbus = true; + } if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) From fca2d1fe779665edc30b62219541dec29a213cdd Mon Sep 17 00:00:00 2001 From: faux123 Date: Fri, 5 Oct 2012 22:08:34 -0700 Subject: [PATCH 088/678] tegra-otg: HACK to allow OTG peripheral and charging at the same time Signed-off-by: Paul Reioux --- drivers/usb/otg/Kconfig | 7 +++++++ drivers/usb/otg/tegra-otg.c | 17 ++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig index bcb3e868033..726f3a2b7e8 100644 --- a/drivers/usb/otg/Kconfig +++ b/drivers/usb/otg/Kconfig @@ -129,6 +129,13 @@ config USB_TEGRA_OTG Enable this driver on boards which use the internal VBUS and ID sensing of the Tegra USB PHY. +config USB_OTG_ON_CHARGING + boolean "Tegra OTG On Charging Hack" + depends on USB && USB_TEGRA_OTG + default y + help + Enable this to allow OTG peripheral and charging at the same time + config AB8500_USB tristate "AB8500 USB Transceiver Driver" depends on AB8500_CORE diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index ef1792286e7..bffd0b654b7 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -64,6 +64,11 @@ struct tegra_otg_data { }; static struct tegra_otg_data *tegra_clone; +#ifdef CONFIG_USB_OTG_ON_CHARGING +static bool tegra_otg_on_charging = false; +module_param(tegra_otg_on_charging, bool, 0664); +#endif + static inline unsigned long otg_readl(struct tegra_otg_data *tegra, unsigned int offset) { @@ -231,11 +236,13 @@ static void irq_work(struct work_struct *work) tegra_state_name(to)); if (tegra->charger_cb) { - // tmtmtm: disable charging of OTG slave - //tegra->charger_cb(to, from, tegra->charger_cb_data); - // tmtmtm: enable vbus in - tegra->detect_vbus = true; - } + if (tegra_otg_on_charging) + /* enable v_bus detection for charging */ + tegra->detect_vbus = true; + else + /* enable OTG to supply internal power */ + tegra->charger_cb(to, from, tegra->charger_cb_data); + } if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) From a7be201a3c3991edd81ce2122f8b133f6f45cc9d Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 7 Oct 2012 07:37:48 -0700 Subject: [PATCH 089/678] tegra3_variant: add debugfs entry to display real tegra3 SOC variant info Signed-off-by: Paul Reioux Conflicts: arch/arm/mach-tegra/Kconfig arch/arm/mach-tegra/tegra3_speedo.c --- arch/arm/mach-tegra/Kconfig | 7 +++++ arch/arm/mach-tegra/edp.c | 41 +++++++++++++++++++++++++++++ arch/arm/mach-tegra/tegra3_speedo.c | 16 +++++++++++ 3 files changed, 64 insertions(+) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 73a9a4b0fc2..f82c2ac9f43 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -472,6 +472,13 @@ config TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND help Also will restore to original cpu frequency governor when device is resumed +config TEGRA_VARIANT_INFO + bool "Tegra3 variant info" + depends on ARCH_TEGRA_3x_SOC + default y + help + Tegra3 SOC variant info display via debugfs + config TEGRA_STAT_MON bool "Enable H/W statistics monitor" depends on ARCH_TEGRA_2x_SOC diff --git a/arch/arm/mach-tegra/edp.c b/arch/arm/mach-tegra/edp.c index a4be48fed4d..56465eafa8b 100644 --- a/arch/arm/mach-tegra/edp.c +++ b/arch/arm/mach-tegra/edp.c @@ -436,6 +436,28 @@ void tegra_get_system_edp_limits(const unsigned int **limits) #ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_TEGRA_VARIANT_INFO +extern int orig_cpu_process_id; +extern int orig_core_process_id; +extern int orig_cpu_speedo_id; +extern int orig_soc_speedo_id; + +static int t3_variant_debugfs_show(struct seq_file *s, void *data) +{ + int cpu_speedo_id = orig_cpu_speedo_id; + int soc_speedo_id = orig_soc_speedo_id; + int cpu_process_id = orig_cpu_process_id; + int core_process_id = orig_core_process_id; + + seq_printf(s, "cpu_speedo_id => %d\n", cpu_speedo_id); + seq_printf(s, "soc_speedo_id => %d\n", soc_speedo_id); + seq_printf(s, "cpu_process_id => %d\n", cpu_process_id); + seq_printf(s, "core_process_id => %d\n", core_process_id); + + return 0; +} +#endif + static int edp_limit_debugfs_show(struct seq_file *s, void *data) { seq_printf(s, "%u\n", tegra_get_edp_limit()); @@ -470,6 +492,12 @@ static int edp_debugfs_show(struct seq_file *s, void *data) return 0; } +#ifdef CONFIG_TEGRA_VARIANT_INFO +static int t3_variant_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, t3_variant_debugfs_show, inode->i_private); +} +#endif static int edp_debugfs_open(struct inode *inode, struct file *file) { @@ -481,6 +509,14 @@ static int edp_limit_debugfs_open(struct inode *inode, struct file *file) return single_open(file, edp_limit_debugfs_show, inode->i_private); } +#ifdef CONFIG_TEGRA_VARIANT_INFO +static const struct file_operations t3_variant_debugfs_fops = { + .open = t3_variant_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif static const struct file_operations edp_debugfs_fops = { .open = edp_debugfs_open, @@ -500,6 +536,11 @@ static int __init tegra_edp_debugfs_init(void) { struct dentry *d; +#ifdef CONFIG_TEGRA_VARIANT_INFO + d = debugfs_create_file("t3_variant", S_IRUGO, NULL, NULL, + &t3_variant_debugfs_fops); +#endif + d = debugfs_create_file("edp", S_IRUGO, NULL, NULL, &edp_debugfs_fops); if (!d) diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index 7f49e164624..cc4f3b675f2 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -129,6 +129,13 @@ static int cpu_speedo_id; static int soc_speedo_id; static int package_id; +#ifdef CONFIG_TEGRA_VARIANT_INFO +int orig_cpu_process_id; +int orig_core_process_id; +int orig_cpu_speedo_id; +int orig_soc_speedo_id; +#endif + static void fuse_speedo_calib(u32 *speedo_g, u32 *speedo_lp) { u32 reg; @@ -233,6 +240,11 @@ static void rev_sku_to_speedo_ids(int rev, int sku) case 0x83: /* T30L or T30S */ switch (package_id) { case 1: /* MID => T30L */ +#ifdef CONFIG_TEGRA_VARIANT_INFO + /* save it for T3 Variant info */ + orig_cpu_speedo_id = 7; + orig_soc_speedo_id = 1; +#endif cpu_speedo_id = 4; soc_speedo_id = 1; threshold_index = 7; @@ -449,6 +461,10 @@ void tegra_init_speedo_data(void) break; } } +#ifdef CONFIG_TEGRA_VARIANT_INFO + core_process_id = iv -1; + orig_core_process_id = core_process_id; +#endif core_process_id = 1; //iv -1; if (core_process_id == -1) { From d658c5105435cb96a14c7ff5e35d778a2ed02cc9 Mon Sep 17 00:00:00 2001 From: franciscofranco Date: Thu, 22 Dec 2011 18:19:31 +0000 Subject: [PATCH 090/678] Optimize slub --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index f73234db904..46d61070d04 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2487,7 +2487,7 @@ EXPORT_SYMBOL(kmem_cache_free); * take the list_lock. */ static int slub_min_order; -static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +static int slub_max_order; static int slub_min_objects; /* From 8719f47b31477d64018baa7e431bda92956d484a Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 14:26:09 -0500 Subject: [PATCH 091/678] smb347-charger.c: host mode charging patch --- drivers/power/smb347-charger.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index ff3d2338fad..499b1724e32 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -103,6 +103,7 @@ #define APSD_DCP 0x02 #define APSD_OTHER 0x03 #define APSD_SDP 0x04 +#define APSD_SDP2 0x06 // tmtmtm: USB host mode charging #define USB_30 0x20 #define DCIN_OV_UV_STS 0x50 #define DELAY_FOR_CURR_LIMIT_RECONF (60) @@ -941,9 +942,17 @@ static int cable_type_detect(void) #ifdef TOUCH_CALLBACK_ENABLED touch_callback(usb_cable); #endif + // tmtmtm start + } else if(retval == APSD_SDP2) { + printk("Cable: SDP2 host mode charging\n"); + success = battery_callback(usb_cable); +#ifdef TOUCH_CALLBACK_ENABLED + touch_callback(usb_cable); +#endif + // tmtmtm end } else { charger->cur_cable_type = unknow_cable; - printk(KERN_INFO "Unkown Plug In Cable type !\n"); + printk(KERN_INFO "Unkown Plug In Cable type !! retval=%d\n",retval); if (gpio_get_value(dock_in)) { charger->cur_cable_type = usb_cable; success = battery_callback(usb_cable); From 482b9b45e250e335f144e1ba0965e7f1181fdc5c Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 15:18:05 -0500 Subject: [PATCH 092/678] minor changes to ondemand --- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 244640ff398..c72b0170499 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,8 +28,8 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define DEF_FREQUENCY_UP_THRESHOLD (95) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) From ac130e71e3f6b8d34f2ffe6a2d86b84f258f6d7e Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 15:20:32 -0500 Subject: [PATCH 093/678] defconfig: update for otg charging and variant info display --- arch/arm/configs/metallice_grouper_defconfig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 2be17e30686..181dd341215 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-446-550" +CONFIG_LOCALVERSION="-MKernel-446-620" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -316,8 +316,8 @@ CONFIG_GPU_OC_446=y # CONFIG_GPU_OC_484 is not set # CONFIG_GPU_OC_520 is not set CONFIG_LP_OVERCLOCK=y -CONFIG_LP_OC_550=y -# CONFIG_LP_OC_620 is not set +# CONFIG_LP_OC_550 is not set +CONFIG_LP_OC_620=y CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y CONFIG_TEGRA_IOVMM_SMMU=y @@ -335,6 +335,7 @@ CONFIG_TEGRA_MC_PROFILE=y CONFIG_TEGRA_EDP_LIMITS=y CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 # CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set +CONFIG_TEGRA_VARIANT_INFO=y CONFIG_USB_HOTPLUG=y CONFIG_TEGRA_DYNAMIC_PWRDET=y CONFIG_TEGRA_EDP_EXACT_FREQ=y @@ -2559,6 +2560,7 @@ CONFIG_USB_ULPI=y CONFIG_USB_ULPI_VIEWPORT=y # CONFIG_NOP_USB_XCEIV is not set CONFIG_USB_TEGRA_OTG=y +CONFIG_USB_OTG_ON_CHARGING=y # CONFIG_UWB is not set CONFIG_MMC=y # CONFIG_MMC_DEBUG is not set From b912ac39fad08da1b89a91cb78384b173d982102 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 19:22:35 -0500 Subject: [PATCH 094/678] Revert " arm: tegra: usb: phy: fix hotplug function" This reverts commit 497c9ebd8b43cc7da9783d9081fbae7de7a71de2. --- arch/arm/mach-tegra/usb_phy.c | 6 +++--- drivers/usb/host/ehci-tegra.c | 8 +------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index 6e84e3d8279..e40e2415c5a 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -865,7 +865,7 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy) val |= HOSTPC1_DEVLC_PHCD; writel(val, base + HOSTPC1_DEVLC); #endif - if (phy->hotplug) { + if (phy->instance == 2) { val = readl(base + USB_SUSP_CTRL); val |= USB_PHY_CLK_VALID_INT_ENB; writel(val, base + USB_SUSP_CTRL); @@ -1482,7 +1482,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) writel(val, base + UTMIP_BAT_CHRG_CFG0); } - if (!phy->hotplug) { + if (phy->instance != 2) { val = readl(base + UTMIP_XCVR_CFG0); val |= (UTMIP_FORCE_PD_POWERDOWN | UTMIP_FORCE_PD2_POWERDOWN | UTMIP_FORCE_PDZI_POWERDOWN); @@ -1512,7 +1512,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) utmi_phy_clk_disable(phy); - utmip_pad_power_off(phy, is_dpd); + utmip_pad_power_off(phy, true); return 0; } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 76f40688f82..cad3bbb2942 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -208,6 +208,7 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB | TEGRA_USB_PHY_CLK_VALID_INT_STS; writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); + val = readl(&hw->status); if (!(val & STS_PCD)) { spin_unlock(&ehci->lock); @@ -217,12 +218,6 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~(TEGRA_USB_PORTSC1_WKCN | PORT_RWC_BITS); writel(val , (hcd->regs + TEGRA_USB_PORTSC1_OFFSET)); } - else if (tegra->bus_suspended && - tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { - printk("%s: no device connected before suspend\n", __func__); - spin_unlock(&ehci->lock); - return 0; - } spin_unlock(&ehci->lock); } @@ -1371,7 +1366,6 @@ static int tegra_ehci_remove(struct platform_device *pdev) usb_remove_hcd(hcd); usb_put_hcd(hcd); tegra_usb_phy_power_off(tegra->phy, true); - tegra_ehci_disable_phy_interrupt(hcd); tegra_usb_phy_close(tegra->phy); iounmap(hcd->regs); From 675b26b95a471ce675f24daffa85e5e91fc8a09c Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 19:56:46 -0500 Subject: [PATCH 095/678] fsl_udc: remove debug message --- drivers/usb/gadget/fsl_udc_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 618d20b1ee4..0a02d200104 100755 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -310,7 +310,6 @@ static void cable_detection_work_handler(struct work_struct *w) if(!s_cable_info.ac_connected) { printk(KERN_INFO "The USB cable is connected\n"); s_cable_info.cable_status = 0x01; //0001 - pr_info("[imoseyon] force hc callback\n"); smb347_hc_mode_callback(1,1); } else { printk(KERN_INFO "AC adapter connect\n"); From 727892f2199963b34d23491eba71aeb0298aa353 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 5 Nov 2012 21:39:39 -0500 Subject: [PATCH 096/678] tegra3_dvfs.c: voltage fixes --- arch/arm/mach-tegra/tegra3_dvfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 2cc70c89b6c..05b63bc8778 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -31,7 +31,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { // 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; - 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; + 800, 800, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -40,7 +40,7 @@ static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { // 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; - 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; + 800, 800, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; @@ -60,7 +60,7 @@ static int cpu_below_core = VDD_CPU_BELOW_VDD_CORE; static struct dvfs_rail tegra3_dvfs_rail_vdd_cpu = { .reg_id = "vdd_cpu", .max_millivolts = 1250, - .min_millivolts = 750, + .min_millivolts = 800, .step = VDD_SAFE_STEP, .jmp_to_zero = true, }; @@ -167,7 +167,7 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), /* Nexus 7 - faking speedo id = 4, process id =2 -// CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), + CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), */ /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ CPU_DVFS("cpu_g", 4, 2, MHZ, 475, 475, 620, 620, 860, 860, 1000, 1000, 1100, 1100, 1200, 1300, 1400, 1500, 1600), From f59e71519ffb7b5e1b2cfb2b890a873c6cf1fde3 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 6 Nov 2012 15:49:59 -0500 Subject: [PATCH 097/678] sched_fair.c: tweak cfs parameters --- arch/arm/configs/metallice_grouper_defconfig | 15 ++++++++++----- kernel/sched_fair.c | 8 ++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 181dd341215..3c95343c904 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -91,7 +91,7 @@ CONFIG_FAIR_GROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y # CONFIG_BLK_CGROUP is not set # CONFIG_NAMESPACES is not set -# CONFIG_SCHED_AUTOGROUP is not set +CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set # CONFIG_RELAY is not set CONFIG_BLK_DEV_INITRD=y @@ -596,18 +596,19 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE_DEMUX is not set +CONFIG_NET_IPGRE_DEMUX=y +# CONFIG_NET_IPGRE is not set # CONFIG_IP_MROUTE is not set # CONFIG_ARPD is not set # CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set +CONFIG_INET_AH=y CONFIG_INET_ESP=y # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set CONFIG_INET_TUNNEL=y CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y -# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_XFRM_MODE_BEET=y # CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_TCP_CONG_ADVANCED is not set @@ -814,7 +815,9 @@ CONFIG_IP6_NF_RAW=y # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set -# CONFIG_L2TP is not set +CONFIG_L2TP=y +# CONFIG_L2TP_DEBUGFS is not set +# CONFIG_L2TP_V3 is not set # CONFIG_BRIDGE is not set # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set @@ -1301,6 +1304,8 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_BSDCOMP=y CONFIG_PPP_MPPE=y # CONFIG_PPPOE is not set +# CONFIG_PPTP is not set +# CONFIG_PPPOL2TP is not set CONFIG_PPPOLAC=y CONFIG_PPPOPNS=y # CONFIG_SLIP is not set diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index bc8ee999381..240739c07b3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -36,8 +36,8 @@ * (to see the precise effective timeslice length of your workload, * run vmstat and monitor the context-switches (cs) field) */ -unsigned int sysctl_sched_latency = 6000000ULL; -unsigned int normalized_sysctl_sched_latency = 6000000ULL; +unsigned int sysctl_sched_latency = 4000000ULL; +unsigned int normalized_sysctl_sched_latency = 4000000ULL; /* * The initial- and re-scaling of tunables is configurable @@ -77,8 +77,8 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */ -unsigned int sysctl_sched_wakeup_granularity = 1000000UL; -unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; +unsigned int sysctl_sched_wakeup_granularity = 4000000UL; +unsigned int normalized_sysctl_sched_wakeup_granularity = 4000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; From 3a9037d489accb6bd3a2f99012552499152a0c52 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 6 Nov 2012 16:00:25 -0500 Subject: [PATCH 098/678] ARM: tegra: clock: Adjust Tegra3 cpu to emc ratio On Tegra3 changed cpu rate threshold for maximum emc rate request from 750MHz to 925MHz. Adjusted cpu frequency table to provide entries close to the new threshold for all Tegra3 skus. --- arch/arm/mach-tegra/tegra3_clocks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 625c2d6d71e..aee583ff210 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4766,10 +4766,10 @@ unsigned long tegra_emc_to_cpu_ratio(unsigned long cpu_rate) /* Vote on memory bus frequency based on cpu frequency; cpu rate is in kHz, emc rate is in Hz */ - if (cpu_rate >= 750000) - return emc_max_rate; /* cpu >= 750 MHz, emc max */ + if (cpu_rate >= 925000) + return emc_max_rate; /* cpu >= 925 MHz, emc max */ else if (cpu_rate >= 450000) - return emc_max_rate/2; /* cpu >= 500 MHz, emc max/2 */ + return emc_max_rate/2; /* cpu >= 450 MHz, emc max/2 */ else if (cpu_rate >= 250000) return 100000000; /* cpu >= 250 MHz, emc 100 MHz */ else From 3bd5efe608475a5ae320b30a3333e2e9c9f09610 Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Mon, 19 Mar 2012 18:21:51 -0700 Subject: [PATCH 099/678] ARM: tegra: clock: Share Tegra3 camera EMC bandwidth Change Tegra3 camera EMC shared user mode from SHARED_FLOOR to SHARED_BW and combine requests from ISO clients (camera and display, which is already in SHARED_BW mode). Bug 652739 Signed-off-by: Alex Frid (cherry picked from commit f1107ea4fe229d9807c1fba79a003753d0a8be7f) Change-Id: If5b7f578060a646df1794dde8c9be2944d88e942 Reviewed-on: http://git-master/r/103498 Reviewed-by: Automatic_Commit_Validation_User Tested-by: Aleksandr Frid Reviewed-by: Jon Mayo GVS: Gerrit_Virtual_Submit Conflicts: arch/arm/mach-tegra/tegra3_clocks.c --- arch/arm/mach-tegra/tegra3_clocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index aee583ff210..f14edcf2e31 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4335,7 +4335,8 @@ struct clk tegra_list_clks[] = { SHARED_CLK("3d.emc", "tegra_gr3d", "emc", &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("2d.emc", "tegra_gr2d", "emc", &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("mpe.emc", "tegra_mpe", "emc", &tegra_clk_emc, NULL, 0, 0), - SHARED_CLK("camera.emc", "tegra_camera", "emc", &tegra_clk_emc, NULL, 0, 0), + SHARED_CLK("camera.emc", "tegra_camera", "emc", &tegra_clk_emc, NULL, 0, SHARED_BW), + SHARED_CLK("sdmmc4.emc", "sdhci-tegra.3", "emc", &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("floor.emc", "floor.emc", NULL, &tegra_clk_emc, NULL, 0, 0), SHARED_CLK("host1x.cbus", "tegra_host1x", "host1x", &tegra_clk_cbus, "host1x", 2, SHARED_AUTO), From 4b5baff4f62544164d31080b9c626f76edef22b3 Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Sun, 18 Mar 2012 00:01:02 -0700 Subject: [PATCH 100/678] ARM: tegra: clock: Account for memory BW efficiency Account for memory efficiency when processing requests from Tegra3 EMC shared bandwidth users. Do not round requests from these users until they are aggregated. The respective debugfs node: /d/tegra_emc/efficiency (in %). Bug 952739 Signed-off-by: Alex Frid (cherry picked from commit 86929087f68c4366d6179101eb9a6a6473a4f084) Change-Id: I4acdd89f44de1401ce5dad8fc4936932df014458 Reviewed-on: http://git-master/r/103499 Reviewed-by: Automatic_Commit_Validation_User Tested-by: Aleksandr Frid Reviewed-by: Jihoon Bang Reviewed-by: Jon Mayo GVS: Gerrit_Virtual_Submit --- arch/arm/mach-tegra/tegra3_clocks.c | 17 ++++++++++++++++- arch/arm/mach-tegra/tegra3_emc.c | 22 ++++++++++++++++++++++ arch/arm/mach-tegra/tegra3_emc.h | 2 ++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index f14edcf2e31..902c2088fdb 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3012,7 +3012,8 @@ static int tegra3_clk_shared_bus_update(struct clk *bus) if (c->u.shared_bus_user.enabled) { switch (c->u.shared_bus_user.mode) { case SHARED_BW: - bw += c->u.shared_bus_user.rate; + if (bw < bus->max_rate) + bw += c->u.shared_bus_user.rate; break; case SHARED_CEILING: ceiling = min(c->u.shared_bus_user.rate, @@ -3025,6 +3026,16 @@ static int tegra3_clk_shared_bus_update(struct clk *bus) } } } + + if (bw) { + if (bus->flags & PERIPH_EMC_ENB) { + bw = tegra_emc_bw_efficiency ? + (bw / tegra_emc_bw_efficiency) : bus->max_rate; + bw = (bw < bus->max_rate / 100) ? + (bw * 100) : bus->max_rate; + } + bw = clk_round_rate_locked(bus, bw); + } rate = min(max(rate, bw), ceiling); old_rate = clk_get_rate_locked(bus); @@ -3073,6 +3084,10 @@ static long tegra_clk_shared_bus_round_rate(struct clk *c, unsigned long rate) if (c->u.shared_bus_user.mode == SHARED_AUTO) rate = 0; + /* BW users should not be rounded until aggregated */ + if (c->u.shared_bus_user.mode == SHARED_BW) + return rate; + return clk_round_rate(c->parent, rate); } diff --git a/arch/arm/mach-tegra/tegra3_emc.c b/arch/arm/mach-tegra/tegra3_emc.c index 3c81495feda..3601c1449dd 100755 --- a/arch/arm/mach-tegra/tegra3_emc.c +++ b/arch/arm/mach-tegra/tegra3_emc.c @@ -45,6 +45,8 @@ static bool emc_enable; #endif module_param(emc_enable, bool, 0644); +u8 tegra_emc_bw_efficiency = 35; + #define EMC_MIN_RATE_DDR3 25500000 #define EMC_STATUS_UPDATE_TIMEOUT 100 #define TEGRA_EMC_TABLE_MAX_SIZE 16 @@ -1273,6 +1275,22 @@ static int eack_state_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(eack_state_fops, eack_state_get, eack_state_set, "%llu\n"); +static int efficiency_get(void *data, u64 *val) +{ + *val = tegra_emc_bw_efficiency; + return 0; +} +static int efficiency_set(void *data, u64 val) +{ + tegra_emc_bw_efficiency = (val > 100) ? 100 : val; + if (emc) + tegra_clk_shared_bus_update(emc); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(efficiency_fops, efficiency_get, + efficiency_set, "%llu\n"); + static int __init tegra_emc_debug_init(void) { if (!tegra_emc_table) @@ -1298,6 +1316,10 @@ static int __init tegra_emc_debug_init(void) "eack_state", S_IRUGO | S_IWUSR, emc_debugfs_root, NULL, &eack_state_fops)) goto err_out; + if (!debugfs_create_file("efficiency", S_IRUGO | S_IWUSR, + emc_debugfs_root, NULL, &efficiency_fops)) + goto err_out; + return 0; err_out: diff --git a/arch/arm/mach-tegra/tegra3_emc.h b/arch/arm/mach-tegra/tegra3_emc.h index cfde92c1355..c6a1ddec49f 100755 --- a/arch/arm/mach-tegra/tegra3_emc.h +++ b/arch/arm/mach-tegra/tegra3_emc.h @@ -27,6 +27,8 @@ #define TEGRA_EMC_BRIDGE_RATE_MIN 300000000 #define TEGRA_EMC_BRIDGE_MVOLTS_MIN 1200 +extern u8 tegra_emc_bw_efficiency; + struct tegra_emc_table { u8 rev; unsigned long rate; From a3379bd3971243074cba4a0ab11ae73ec0a65e3f Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Sat, 17 Mar 2012 23:38:07 -0700 Subject: [PATCH 101/678] ARM: tegra: clock: Add locked version of round rate Add locked version of round rate API to be used by tegra arch specific layer. Signed-off-by: Alex Frid (cherry picked from commit 457627966b91f2141439812869adc4acf9242471) Change-Id: Id68d0bb952d1e7d9e650341872d1b06b0b2d3cea Reviewed-on: http://git-master/r/100474 Reviewed-by: Automatic_Commit_Validation_User Tested-by: Aleksandr Frid Reviewed-by: Yu-Huan Hsu --- arch/arm/mach-tegra/clock.c | 16 ++++++++++++---- arch/arm/mach-tegra/clock.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c index e9ee87534bc..67ea3c39cbe 100644 --- a/arch/arm/mach-tegra/clock.c +++ b/arch/arm/mach-tegra/clock.c @@ -525,13 +525,11 @@ unsigned long clk_get_rate_all_locked(struct clk *c) return rate; } -long clk_round_rate(struct clk *c, unsigned long rate) +long clk_round_rate_locked(struct clk *c, unsigned long rate) { - unsigned long flags, max_rate; + unsigned long max_rate; long ret; - clk_lock_save(c, &flags); - if (!c->ops || !c->ops->round_rate) { ret = -ENOSYS; goto out; @@ -544,6 +542,16 @@ long clk_round_rate(struct clk *c, unsigned long rate) ret = c->ops->round_rate(c, rate); out: + return ret; +} + +long clk_round_rate(struct clk *c, unsigned long rate) +{ + unsigned long flags; + long ret; + + clk_lock_save(c, &flags); + ret = clk_round_rate_locked(c, rate); clk_unlock_restore(c, &flags); return ret; } diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h index dde9e07292a..a9945e3c229 100644 --- a/arch/arm/mach-tegra/clock.h +++ b/arch/arm/mach-tegra/clock.h @@ -240,6 +240,7 @@ unsigned long clk_get_min_rate(struct clk *c); unsigned long clk_get_rate_locked(struct clk *c); int clk_set_rate_locked(struct clk *c, unsigned long rate); int clk_set_parent_locked(struct clk *c, struct clk *parent); +long clk_round_rate_locked(struct clk *c, unsigned long rate); int tegra_clk_shared_bus_update(struct clk *c); void tegra2_sdmmc_tap_delay(struct clk *c, int delay); void tegra3_set_cpu_skipper_delay(int delay); From d77ff796be61f5c4ed15a88277bc465602bdcc41 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 6 Nov 2012 17:26:27 -0500 Subject: [PATCH 102/678] tegra3_dvfs.c: revert voltage and dvfs table changes for g cores revert to stock voltages and dvfs table based on stock 1.5Ghz values for speedo ID 2 --- arch/arm/mach-tegra/tegra3_dvfs.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 05b63bc8778..101281ef746 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,8 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { -// 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; - 800, 800, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; #endif static bool tegra_dvfs_cpu_disabled; @@ -39,8 +38,7 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { -// 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; - 800, 800, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; @@ -166,11 +164,11 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 0, MHZ, 460, 460, 550, 550, 680, 680, 820, 970, 1040, 1080, 1150, 1200, 1240, 1280, 1320, 1360, 1360, 1500), CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), - /* Nexus 7 - faking speedo id = 4, process id =2 - CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), */ + /* Nexus 7 - faking speedo id = 4, process id =2*/ + CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ - CPU_DVFS("cpu_g", 4, 2, MHZ, 475, 475, 620, 620, 860, 860, 1000, 1000, 1100, 1100, 1200, 1300, 1400, 1500, 1600), +// CPU_DVFS("cpu_g", 4, 2, MHZ, 475, 475, 620, 620, 860, 860, 1000, 1000, 1100, 1100, 1200, 1300, 1400, 1500, 1600), CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), From a220a7c2e6f05e499310786bef7f18d11e5e0c32 Mon Sep 17 00:00:00 2001 From: Joseph Lo Date: Thu, 29 Mar 2012 14:46:33 +0800 Subject: [PATCH 103/678] ARM: tegra: cpu: enable VDD_CPU rail before LP to G transition conflict - add decouple mode from flags When doing LP to G transition, it had a power up latency on VDD_CPU rail. To reduce the latency, CPU_LP can trun on the VDD_CPU rail before the LP to G transition. Bug 930985 Change-Id: I087e185ea5aa90f309b8cafba9bc4bb7d3fc950c Signed-off-by: Joseph Lo Reviewed-on: http://git-master/r/93141 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Antti Miettinen Tested-by: Antti Miettinen GVS: Gerrit_Virtual_Submit Reviewed-by: Juha Tukkinen Conflicts: arch/arm/mach-tegra/pm.c --- arch/arm/mach-tegra/pm.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index e53effb9b66..7d2d1016843 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -138,6 +138,11 @@ struct suspend_context tegra_sctx; #define PMC_CPUPWROFF_TIMER 0xcc #define PMC_COREPWROFF_TIMER PMC_WAKE_DELAY +#define PMC_PWRGATE_TOGGLE 0x30 +#define PWRGATE_TOGGLE_START (1 << 8) +#define UN_PWRGATE_CPU \ + (PWRGATE_TOGGLE_START | TEGRA_CPU_POWERGATE_ID(TEGRA_POWERGATE_CPU)) + #ifdef CONFIG_TEGRA_CLUSTER_CONTROL #define PMC_SCRATCH4_WAKE_CLUSTER_MASK (1<<31) #endif @@ -527,7 +532,6 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) mode |= TEGRA_POWER_PWRREQ_OE; mode &= ~TEGRA_POWER_EFFECT_LP0; pmc_32kwritel(mode, PMC_CTRL); - mode |= flags; tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_start); @@ -539,7 +543,17 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) trace_cpu_cluster(POWER_CPU_CLUSTER_START); set_power_timers(pdata->cpu_timer, 0, clk_get_rate_all_locked(tegra_pclk)); - tegra_cluster_switch_prolog(mode); + if (flags & TEGRA_POWER_CLUSTER_G) { + /* + * To reduce the vdd_cpu up latency when LP->G + * transition. Before the transition, enable + * the vdd_cpu rail. + */ + if (is_lp_cluster()) + writel(UN_PWRGATE_CPU, + pmc + PMC_PWRGATE_TOGGLE); + } + tegra_cluster_switch_prolog(flags); } else { set_power_timers(pdata->cpu_timer, pdata->cpu_off_timer, clk_get_rate_all_locked(tegra_pclk)); @@ -549,7 +563,7 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) tegra_lp2_set_trigger(sleep_time); cpu_complex_pm_enter(); - suspend_cpu_complex(mode); + suspend_cpu_complex(flags); tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_prolog); flush_cache_all(); /* @@ -566,7 +580,7 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) tegra_init_cache(false); tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_switch); - restore_cpu_complex(mode); + restore_cpu_complex(flags); cpu_complex_pm_exit(); remain = tegra_lp2_timer_remain(); @@ -574,7 +588,7 @@ unsigned int tegra_idle_lp2_last(unsigned int sleep_time, unsigned int flags) tegra_lp2_set_trigger(0); if (flags & TEGRA_POWER_CLUSTER_MASK) { - tegra_cluster_switch_epilog(mode); + tegra_cluster_switch_epilog(flags); trace_cpu_cluster(POWER_CPU_CLUSTER_DONE); } tegra_cluster_switch_time(flags, tegra_cluster_switch_time_id_epilog); From 4d62299eae840f2b1ae41f510d47dc370ae8ee42 Mon Sep 17 00:00:00 2001 From: faux123 Date: Sat, 12 Nov 2011 23:36:46 -0800 Subject: [PATCH 104/678] kernel:sched: LOAD_FREQ (4*HZ+61) avoids loadavg Moire LOAD_FREQ is (5*HZ+1) to avoid high load average when idle: http://kerneltrap.org/mailarchive/linux-kernel/2007/10/3/328568 I suggest (4*HZ+61) for a better distribution. With some seconds based load (like SSL heartbeats) and LOAD_FREQ at (5*HZ+1) I see Moire patterns like inverse sawtooth, since 2 or 3 probes hit the jobs (load increases quickly), followed by several probes missing it. A 4.61 sec interval gives optimal distribution over when within a second a probe is taken, as .61 is close to golden ratio phi 1.618... (test in http://ripke.com/goldenratio.c). 12*4.61 = 55.32 secs is still close to a minute, and 13*4.61=59.93 is even closer than the current 12*5.01=60.12 (with exponents EXP_x adjusted to a ratio of 13 instead of 12). --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5bb4dd2e4c5..f9ff1e11a3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -122,7 +122,7 @@ extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1< Date: Sun, 27 Nov 2011 00:28:19 -0800 Subject: [PATCH 105/678] ARM: Add optimised swahb32() byteswap helper for v6 and above ARMv6 and later processors have the REV16 instruction, which swaps the bytes within each halfword of a register value. This is already used to implement swab16(), but since the native operation performaed by REV16 is actually swahb32(), this patch renames the existing swab16() helper accordingly and defines __arch_swab16() in terms of it. This allows calls to both swab16() and swahb32() to be optimised. The compiler's generated code might improve someday, but as of 4.5.2 the code generated for pure C implementing these 16-bit bytesswaps remains pessimal. swahb32() is useful for converting 32-bit Thumb instructions between integer and memory representation on BE8 platforms (among other uses). Signed-off-by: Dave Martin --- arch/arm/include/asm/swab.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/swab.h b/arch/arm/include/asm/swab.h index 9997ad20eff..32ee164a2f6 100644 --- a/arch/arm/include/asm/swab.h +++ b/arch/arm/include/asm/swab.h @@ -24,12 +24,13 @@ #if defined(__KERNEL__) && __LINUX_ARM_ARCH__ >= 6 -static inline __attribute_const__ __u16 __arch_swab16(__u16 x) +static inline __attribute_const__ __u32 __arch_swahb32(__u32 x) { __asm__ ("rev16 %0, %1" : "=r" (x) : "r" (x)); return x; } -#define __arch_swab16 __arch_swab16 +#define __arch_swahb32 __arch_swahb32 +#define __arch_swab16(x) ((__u16)__arch_swahb32(x)) static inline __attribute_const__ __u32 __arch_swab32(__u32 x) { From df50832ea7f427efc5acd661a0f7f6b0a5e8b1fd Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 14 Dec 2011 12:22:44 -0800 Subject: [PATCH 106/678] Asynchronous I/O latency to a solid-state disk greatly increased between the 2.6.32 and 3.0 kernels. By removing the plug from do_io_submit(), we observed a 34% improvement in the I/O latency. Unfortunately, at this level, we don't know if the request is to a rotating disk or not. Signed-off-by: Dave Kleikamp Cc: linux-aio@kvack.org Cc: Chris Mason Cc: Jens Axboe Cc: Andi Kleen Cc: Jeff Moyer --- fs/aio.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index e29ec485af2..75e05c91605 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1622,7 +1622,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, struct kioctx *ctx; long ret = 0; int i; - struct blk_plug plug; if (unlikely(nr < 0)) return -EINVAL; @@ -1639,8 +1638,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, return -EINVAL; } - blk_start_plug(&plug); - /* * AKPM: should this return a partial result if some of the IOs were * successfully submitted? @@ -1663,7 +1660,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, if (ret) break; } - blk_finish_plug(&plug); put_ioctx(ctx); return i ? i : ret; From 734fc4c77dcbc244fc5b9c749d00398ec6209498 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 21 Dec 2011 00:43:23 -0800 Subject: [PATCH 107/678] block: recursive merge requests In my workload, thread 1 accesses a, a+2, ..., thread 2 accesses a+1, a+3,.... When the requests are flushed to queue, a and a+1 are merged to (a, a+1), a+2 and a+3 too to (a+2, a+3), but (a, a+1) and (a+2, a+3) aren't merged. With recursive merge below, the workload throughput gets improved 20% and context switch drops 60%. Signed-off-by: Shaohua Li --- block/elevator.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index a3b64bc71d8..979bab9bf35 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -522,6 +522,7 @@ static bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) { struct request *__rq; + bool ret; if (blk_queue_nomerges(q)) return false; @@ -535,14 +536,21 @@ static bool elv_attempt_insert_merge(struct request_queue *q, if (blk_queue_noxmerges(q)) return false; + ret = false; /* * See if our hash lookup can find a potential backmerge. */ - __rq = elv_rqhash_find(q, blk_rq_pos(rq)); - if (__rq && blk_attempt_req_merge(q, __rq, rq)) - return true; + while (1) { + __rq = elv_rqhash_find(q, blk_rq_pos(rq)); + if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) + break; - return false; + /* The merged request could be merged with others, try again */ + ret = true; + rq = __rq; + } + + return ret; } void elv_merged_request(struct request_queue *q, struct request *rq, int type) From 143cb1a810685bebc2d5838c9ac5a5485df1ede1 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Sun, 15 Jan 2012 10:29:06 -0800 Subject: [PATCH 108/678] cpufreq: update lpj only if frequency has changed During scaling up of cpu frequency, loops_per_jiffy is updated upon invoking PRECHANGE notifier. If setting to new frequency fails in cpufreq driver, lpj is left at incorrect value. Hence update lpj only if cpu frequency is changed, i.e. upon invoking POSTCHANGE notifier. Penalty would be that during time period between changing cpu frequency & invocation of POSTCHANGE notifier, udelay(x) may not gurantee minimal delay of 'x' us for frequency scaling up operation. Perhaps a better solution would be to define CPUFREQ_ABORTCHANGE & handle accordingly, but then it would be more intrusive (using ABORTCHANGE may help drivers also; if any has registered notifier and expect POST for a PRECHANGE, their needs can be taken care using ABORT) Signed-off-by: Afzal Mohammed --- drivers/cpufreq/cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9cd242e282c..1738ad91c5a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -205,8 +205,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) pr_debug("saving %lu as reference value for loops_per_jiffy; " "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); } - if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || - (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) || + if ((val == CPUFREQ_POSTCHANGE && ci->old != ci->new) || (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, ci->new); From 73e94b87cf5f5e29302a8c574658ba6641b21607 Mon Sep 17 00:00:00 2001 From: faux123 Date: Mon, 19 Mar 2012 17:22:43 -0700 Subject: [PATCH 109/678] Optimized ARM RWSEM algorithm RWSEM implementation for ARM using atomic functions. Heavily based on arch/sh/include/asm/rwsem.h Signed-off-by: Ashwin Chaugule --- arch/arm/Kconfig | 3 +- arch/arm/include/asm/rwsem.h | 138 +++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 arch/arm/include/asm/rwsem.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3c3b868948a..86456c5787c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -139,10 +139,9 @@ config GENERIC_LOCKBREAK config RWSEM_GENERIC_SPINLOCK bool - default y config RWSEM_XCHGADD_ALGORITHM - bool + def_bool y config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/arm/include/asm/rwsem.h b/arch/arm/include/asm/rwsem.h new file mode 100644 index 00000000000..2066674d8e6 --- /dev/null +++ b/arch/arm/include/asm/rwsem.h @@ -0,0 +1,138 @@ +/* rwsem.h: R/W semaphores implemented using ARM atomic functions. + * + * Copyright (c) 2010, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef _ASM_ARM_RWSEM_H +#define _ASM_ARM_RWSEM_H + +#ifndef _LINUX_RWSEM_H +#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" +#endif + +#ifdef __KERNEL__ +#include +#include + +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + if (atomic_inc_return((atomic_t *)(&sem->count)) < 0) + rwsem_down_read_failed(sem); +} + +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + int tmp; + + while ((tmp = sem->count) >= 0) { + if (tmp == cmpxchg(&sem->count, tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { + return 1; + } + } + return 0; +} + +/* + * lock for writing + */ +static inline void __down_write(struct rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)); + if (tmp != RWSEM_ACTIVE_WRITE_BIAS) + rwsem_down_write_failed(sem); +} + +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + int tmp; + + tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + return tmp == RWSEM_UNLOCKED_VALUE; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_dec_return((atomic_t *)(&sem->count)); + if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_t *)(&sem->count)) < 0) + rwsem_wake(sem); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +{ + atomic_add(delta, (atomic_t *)(&sem->count)); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + int tmp; + + tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count)); + if (tmp < 0) + rwsem_downgrade_wake(sem); +} + +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +{ + __down_write(sem); +} + +/* + * implement exchange and add functionality + */ +static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +{ + return atomic_add_return(delta, (atomic_t *)(&sem->count)); +} + +#endif /* __KERNEL__ */ +#endif /* _ASM_ARM_RWSEM_H */ From d4c18e56fe74e9222b55fb33b1f8bf5399f72f23 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 May 2012 21:08:33 -0700 Subject: [PATCH 110/678] timer: optimize apply_slack() __fls(mask) is equivalent to find_last_bit(&mask, BITS_PER_LONG), but cheaper. find_last_bit was showing up high on the list when I was profiling for stalls on icache misses on a system with very small cache size (MIPS). Signed-off-by: Felix Fietkau --- kernel/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/timer.c b/kernel/timer.c index 8cff36119e4..b7474f32e11 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -763,7 +763,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) if (mask == 0) return expires; - bit = find_last_bit(&mask, BITS_PER_LONG); + bit = __fls(mask); mask = (1 << bit) - 1; From aa41ebc63b8b09192aac2f91a44e9f258deaa7de Mon Sep 17 00:00:00 2001 From: faux123 Date: Sat, 25 Feb 2012 12:35:48 -0800 Subject: [PATCH 111/678] sched: disable GENTLTE_FAIR_SLEEPERS for better performance on Android --- kernel/sched_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 2e74677cb04..48e69155111 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -3,7 +3,7 @@ * them to run sooner, but does not allow tons of sleepers to * rip the spread apart. */ -SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) +SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 0) /* * Place new tasks ahead so that they do not starve already running From a42355cdb2037952aa292061fd15d3ce4b465fc6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 May 2012 18:41:12 +0100 Subject: [PATCH 112/678] ARM: spinlock: use ticket algorithm for ARMv6+ locking implementation Ticket spinlocks ensure locking fairness by reducing the thundering herd effect when acquiring a lock. This is especially important on systems where memory-access times are not necessarily uniform when accessing the lock structure (for example, on a multi-cluster platform where the lock is allocated into L1 when a CPU releases it). This patch implements the ticket spinlock algorithm for ARM, replacing the simpler implementation for ARMv6+ processors. Signed-off-by: Will Deacon --- arch/arm/include/asm/spinlock.h | 73 ++++++++++++++++++--------- arch/arm/include/asm/spinlock_types.h | 17 ++++++- 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 65fa3c88095..dcca63802a0 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -59,18 +59,13 @@ static inline void dsb_sev(void) } /* - * ARMv6 Spin-locking. + * ARMv6 ticket-based spin-locking. * - * We exclusively read the old value. If it is zero, we may have - * won the lock, so we try exclusively storing it. A memory barrier - * is required after we get a lock, and before we release it, because - * V6 CPUs are assumed to have weakly ordered memory. - * - * Unlocked value: 0 - * Locked value: 1 + * A memory barrier is required after we get a lock, and before we + * release it, because V6 CPUs are assumed to have weakly ordered + * memory. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -79,31 +74,40 @@ static inline void dsb_sev(void) static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; + u32 newval; + arch_spinlock_t lockval; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" -" teq %0, #0\n" - WFE("ne") -" strexeq %0, %2, [%1]\n" -" teqeq %0, #0\n" +"1: ldrex %0, [%3]\n" +" add %1, %0, %4\n" +" strex %2, %1, [%3]\n" +" teq %2, #0\n" " bne 1b" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); + while (lockval.tickets.next != lockval.tickets.owner) { + wfe(); + lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + } + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp; + u32 slock; __asm__ __volatile__( -" ldrex %0, [%1]\n" -" teq %0, #0\n" -" strexeq %0, %2, [%1]" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) +" ldrex %0, [%2]\n" +" cmp %0, %0, ror #16\n" +" movne %1, #1\n" +" addeq %0, %0, %3\n" +" strexeq %1, %0, [%2]" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); if (tmp == 0) { @@ -116,17 +120,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { + unsigned long tmp; + u32 slock; + smp_mb(); __asm__ __volatile__( -" str %1, [%0]\n" - : - : "r" (&lock->lock), "r" (0) +" mov %1, #1\n" +"1: ldrex %0, [%2]\n" +" uadd16 %0, %0, %1\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock) : "cc"); dsb_sev(); } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return tickets.owner != tickets.next; +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return (tickets.next - tickets.owner) > 1; +} +#define arch_spin_is_contended arch_spin_is_contended + /* * RWLOCKS * diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index d14d197ae04..b262d2f8b47 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -5,11 +5,24 @@ # error "please don't include this file directly" #endif +#define TICKET_SHIFT 16 + typedef struct { - volatile unsigned int lock; + union { + u32 slock; + struct __raw_tickets { +#ifdef __ARMEB__ + u16 next; + u16 owner; +#else + u16 owner; + u16 next; +#endif + } tickets; + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { volatile unsigned int lock; From 6ce25d89439bc94986a8400a32b878eb5c0421e1 Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 3 Jun 2012 11:01:45 -0700 Subject: [PATCH 113/678] sched/nohz: Fix rq->cpu_load[] calculations While investigating why the load-balancer did funny I found that the rq->cpu_load[] tables were completely screwy.. a bit more digging revealed that the updates that got through were missing ticks followed by a catchup of 2 ticks. The catchup assumes the cpu was idle during that time (since only nohz can cause missed ticks and the machine is idle etc..) this means that esp. the higher indices were significantly lower than they ought to be. The reason for this is that its not correct to compare against jiffies on every jiffy on any other cpu than the cpu that updates jiffies. This patch cludges around it by only doing the catch-up stuff from nohz_idle_balance() and doing the regular stuff unconditionally from the tick. Signed-off-by: Peter Zijlstra Cc: pjt@google.com Cc: Venkatesh Pallipadi Link: http://lkml.kernel.org/n/tip-tp4kj18xdd5aj4vvj0qg55s2@git.kernel.org Signed-off-by: Ingo Molnar modified for Linux 3.0 from Linux 3.4+ by faux123 --- kernel/sched.c | 52 +++++++++++++++++++++++++++++++++------------ kernel/sched_fair.c | 4 +++- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 6121c2ce14b..4e2ac46c606 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1732,7 +1732,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) static void calc_load_account_idle(struct rq *this_rq); static void update_sysctl(void); static int get_update_sysctl_factor(void); -static void update_cpu_load(struct rq *this_rq); static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) { @@ -3593,22 +3592,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) * scheduler tick (TICK_NSEC). With tickless idle this will not be called * every tick. We fix it up based on jiffies. */ -static void update_cpu_load(struct rq *this_rq) +static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, + unsigned long pending_updates) { - unsigned long this_load = this_rq->load.weight; - unsigned long curr_jiffies = jiffies; - unsigned long pending_updates; int i, scale; this_rq->nr_load_updates++; - /* Avoid repeated calls on same jiffy, when moving in and out of idle */ - if (curr_jiffies == this_rq->last_load_update_tick) - return; - - pending_updates = curr_jiffies - this_rq->last_load_update_tick; - this_rq->last_load_update_tick = curr_jiffies; - /* Update our load: */ this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { @@ -3633,9 +3623,45 @@ static void update_cpu_load(struct rq *this_rq) sched_avg_update(this_rq); } +/* + * Called from nohz_idle_balance() to update the load ratings before doing the + * idle balance. + */ +void update_idle_cpu_load(struct rq *this_rq) +{ + unsigned long curr_jiffies = jiffies; + unsigned long load = this_rq->load.weight; + unsigned long pending_updates; + + /* + * Bloody broken means of dealing with nohz, but better than nothing.. + * jiffies is updated by one cpu, another cpu can drift wrt the jiffy + * update and see 0 difference the one time and 2 the next, even though + * we ticked at roughtly the same rate. + * + * Hence we only use this from nohz_idle_balance() and skip this + * nonsense when called from the scheduler_tick() since that's + * guaranteed a stable rate. + */ + if (load || curr_jiffies == this_rq->last_load_update_tick) + return; + + pending_updates = curr_jiffies - this_rq->last_load_update_tick; + this_rq->last_load_update_tick = curr_jiffies; + + __update_cpu_load(this_rq, load, pending_updates); +} + +/* + * Called from scheduler_tick() + */ static void update_cpu_load_active(struct rq *this_rq) { - update_cpu_load(this_rq); + /* + * See the mess in update_idle_cpu_load(). + */ + this_rq->last_load_update_tick = jiffies; + __update_cpu_load(this_rq, this_rq->load.weight, 1); calc_load_account_active(this_rq); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 240739c07b3..c2aae12cb54 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3967,6 +3967,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) } #ifdef CONFIG_NO_HZ +extern void update_idle_cpu_load(struct rq *this_rq); + /* * In CONFIG_NO_HZ case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. @@ -3996,7 +3998,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) raw_spin_lock_irq(&this_rq->lock); update_rq_clock(this_rq); - update_cpu_load(this_rq); + update_idle_cpu_load(this_rq); raw_spin_unlock_irq(&this_rq->lock); rebalance_domains(balance_cpu, CPU_IDLE); From 249d5cc6679073760a7a6d9d56e9f69c1b866a96 Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 3 Jun 2012 11:02:28 -0700 Subject: [PATCH 114/678] sched/fair: Improve the ->group_imb logic Group imbalance is meant to deal with situations where affinity masks and sched domains don't align well, such as 3 cpus from one group and 6 from another. In this case the domain based balancer will want to put an equal amount of tasks on each side even though they don't have equal cpus. Currently group_imb is set whenever two cpus of a group have a weight difference of at least one avg task and the heaviest cpu has at least two tasks. A group with imbalance set will always be picked as busiest and a balance pass will be forced. The problem is that even if there are no affinity masks this stuff can trigger and cause weird balancing decisions, eg. the observed behaviour was that of 6 cpus, 5 had 2 and 1 had 3 tasks, due to the difference of 1 avg load (they all had the same weight) and nr_running being >1 the group_imbalance logic triggered and did the weird thing of pulling more load instead of trying to move the 1 excess task to the other domain of 6 cpus that had 5 cpu with 2 tasks and 1 cpu with 1 task. Curb the group_imbalance stuff by making the nr_running condition weaker by also tracking the min_nr_running and using the difference in nr_running over the set instead of the absolute max nr_running. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-9s7dedozxo8kjsb9kqlrukkf@git.kernel.org Signed-off-by: Ingo Molnar modified for Linux 3.0 from Linux 3.4+ by faux123 --- kernel/sched_fair.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c2aae12cb54..06e3c0b7c3d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2727,7 +2727,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, int local_group, const struct cpumask *cpus, int *balance, struct sg_lb_stats *sgs) { - unsigned long load, max_cpu_load, min_cpu_load, max_nr_running; + unsigned long nr_running, max_nr_running, min_nr_running; + unsigned long load, max_cpu_load, min_cpu_load; int i; unsigned int balance_cpu = -1, first_idle_cpu = 0; unsigned long avg_load_per_task = 0; @@ -2739,10 +2740,13 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, max_cpu_load = 0; min_cpu_load = ~0UL; max_nr_running = 0; + min_nr_running = ~0UL; for_each_cpu_and(i, sched_group_cpus(group), cpus) { struct rq *rq = cpu_rq(i); + nr_running = rq->nr_running; + /* Bias balancing toward cpus of our domain */ if (local_group) { if (idle_cpu(i) && !first_idle_cpu) { @@ -2753,16 +2757,19 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, load = target_load(i, load_idx); } else { load = source_load(i, load_idx); - if (load > max_cpu_load) { + if (load > max_cpu_load) max_cpu_load = load; - max_nr_running = rq->nr_running; - } if (min_cpu_load > load) min_cpu_load = load; + + if (nr_running > max_nr_running) + max_nr_running = nr_running; + if (min_nr_running > nr_running) + min_nr_running = nr_running; } sgs->group_load += load; - sgs->sum_nr_running += rq->nr_running; + sgs->sum_nr_running += nr_running; sgs->sum_weighted_load += weighted_cpuload(i); if (idle_cpu(i)) sgs->idle_cpus++; @@ -2797,7 +2804,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, if (sgs->sum_nr_running) avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; - if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) + if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && + (max_nr_running - min_nr_running) > 1) sgs->group_imb = 1; sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power, From 807cae75e15b7be8470237f967d6809c21970725 Mon Sep 17 00:00:00 2001 From: faux123 Date: Tue, 12 Jun 2012 13:45:02 -0700 Subject: [PATCH 115/678] sched: Folding nohz load accounting more accurate After patch 453494c3d4 (sched: Fix nohz load accounting -- again!), we can fold the idle into calc_load_tasks_idle between the last cpu load calculating and calc_global_load calling. However problem still exits between the first cpu load calculating and the last cpu load calculating. Every time when we do load calculating, calc_load_tasks_idle will be added into calc_load_tasks, even if the idle load is caused by calculated cpus. This problem is also described in the following link: https://lkml.org/lkml/2012/5/24/419 This bug can be found in our work load. The average running processes number is about 15, but the load only shows about 4. The patch provides a solution, by taking calculated load cpus' idle away from real effective idle. First adds a cpumask to record those cpus that alread calculated their load, and then adds a calc_unmask_cpu_load_idle to record thoses not marked cpus' go-idle load. Calc_unmask_cpu_load_idle takes place of calc_load_tasks_idle to be added into calc_load_tasks every 5HZ when cpu calculate its load. Go-idle load on those cpus which load alread has been calculated will only be added into calc_load_tasks_idle, no in calc_unmask_cpu_load_idle. Reported-by: Sha Zhengju Signed-off-by: Charles Wang modified for HTC One X kernel from Linux 3.4+ by faux123 --- include/linux/sched.h | 2 +- kernel/sched.c | 84 ++++++++++++++++++++++++++++++++++++++- kernel/time/timekeeping.c | 1 + 3 files changed, 84 insertions(+), 3 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index f9ff1e11a3a..11af9aaf064 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -144,7 +144,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); - +extern void prepare_idle_mask(unsigned long ticks); extern unsigned long get_parent_ip(unsigned long addr); struct seq_file; diff --git a/kernel/sched.c b/kernel/sched.c index 4e2ac46c606..eeed910c36d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3270,6 +3270,7 @@ unsigned long this_cpu_load(void) /* Variables and functions for calc_load */ static atomic_long_t calc_load_tasks; static unsigned long calc_load_update; +static unsigned long idle_mask_update; unsigned long avenrun[3]; EXPORT_SYMBOL(avenrun); @@ -3305,13 +3306,37 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) */ static atomic_long_t calc_load_tasks_idle; +/* + * Those cpus whose load alread has been calculated in this LOAD_FREQ + * period will be masked. + */ +struct cpumask cpu_load_update_mask; + +/* + * Fold unmask cpus' idle load + */ +static atomic_long_t calc_unmask_cpu_load_idle; + static void calc_load_account_idle(struct rq *this_rq) { long delta; + int cpu = smp_processor_id(); delta = calc_load_fold_active(this_rq); - if (delta) + if (delta) { atomic_long_add(delta, &calc_load_tasks_idle); + /* + * calc_unmask_cpu_load_idle is only used between the first + * cpu load accounting + * and the last cpu load accounting in every LOAD_FREQ period, + * and records idle load on + * those unmask cpus. + */ + if (!cpumask_empty(&cpu_load_update_mask) && + !cpumask_test_cpu(cpu, &cpu_load_update_mask)) { + atomic_long_add(delta, &calc_unmask_cpu_load_idle); + } + } } static long calc_load_fold_idle(void) @@ -3327,6 +3352,18 @@ static long calc_load_fold_idle(void) return delta; } +static long calc_load_fold_unmask_idle(void) +{ + long delta = 0; + + if (atomic_long_read(&calc_unmask_cpu_load_idle)) { + delta = atomic_long_xchg(&calc_unmask_cpu_load_idle, 0); + atomic_long_sub(delta, &calc_load_tasks_idle); + } + + return delta; +} + /** * fixed_power_int - compute: x^n, in O(log n) time * @@ -3421,6 +3458,9 @@ static void calc_global_nohz(unsigned long ticks) if (delta) atomic_long_add(delta, &calc_load_tasks); + cpumask_clear(&cpu_load_update_mask); + atomic_long_xchg(&calc_unmask_cpu_load_idle, 0); + /* * If we were idle for multiple load cycles, apply them. */ @@ -3478,6 +3518,26 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) loads[2] = (avenrun[2] + offset) << shift; } +/* ++ * Prepare cpu_load_update_mask for the comming per-cpu load calculating ++ */ +void prepare_idle_mask(unsigned long ticks) +{ + if (time_before(jiffies, idle_mask_update - 10)) + return; + + cpumask_clear(&cpu_load_update_mask); + /* + * calc_unmask_cpu_load_idle is part of calc_load_tasks_idle, + * and calc_load_tasks_ide will be folded into calc_load_tasks + * immediately. + * So no need to keep this now. + */ + atomic_long_xchg(&calc_unmask_cpu_load_idle, 0); + + idle_mask_update += LOAD_FREQ; +} + /* * calc_load - update the avenrun load estimates 10 ticks after the * CPUs have updated calc_load_tasks. @@ -3508,12 +3568,30 @@ void calc_global_load(unsigned long ticks) static void calc_load_account_active(struct rq *this_rq) { long delta; + int cpu = smp_processor_id(); if (time_before(jiffies, this_rq->calc_load_update)) return; + /* + * cpu_load_update_mask empty means the first cpu + * doing load calculating. Global idle should be + * folded into calc_load_tasks, so we just push it + * to calc_unmask_cpu_load_idle. + */ + if (cpumask_empty(&cpu_load_update_mask)) + atomic_long_set(&calc_unmask_cpu_load_idle, + atomic_long_read(&calc_load_tasks_idle)); + /* + * Mask this cpu as load calculated, + * then go-idle in this cpu won't take effect + * to calc_load_tasks. + */ + cpumask_set_cpu(cpu, &cpu_load_update_mask); + delta = calc_load_fold_active(this_rq); - delta += calc_load_fold_idle(); + /* Fold unmask cpus' load into calc_load_tasks */ + delta += calc_load_fold_unmask_idle(); if (delta) atomic_long_add(delta, &calc_load_tasks); @@ -8203,6 +8281,8 @@ void __init sched_init(void) calc_load_update = jiffies + LOAD_FREQ; + idle_mask_update = jiffies + LOAD_FREQ; + /* * During early bootup we pretend to be a normal task: */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 00231db0eae..ec66f787a74 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1115,6 +1115,7 @@ void do_timer(unsigned long ticks) jiffies_64 += ticks; update_wall_time(); calc_global_load(ticks); + prepare_idle_mask(ticks); } /** From a5e66a721a2930974af2b3c3e23a68d46859b66f Mon Sep 17 00:00:00 2001 From: faux123 Date: Fri, 22 Jun 2012 15:45:52 -0700 Subject: [PATCH 116/678] mm, oom: fix potential killing of thread that is disabled from oom killing /proc/sys/vm/oom_kill_allocating_task will immediately kill current when the oom killer is called to avoid a potentially expensive tasklist scan for large systems. Currently, however, it is not checking current's oom_score_adj value which may be OOM_SCORE_ADJ_MIN, meaning that it has been disabled from oom killing. This patch avoids killing current in such a condition and simply falls back to the tasklist scan since memory still needs to be freed. Signed-off-by: David Rientjes backported from Linux 3.5 to linux 2.6.39 by faux123 --- mm/oom_kill.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index e9a17857a20..756071f70e0 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -720,9 +720,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); read_lock(&tasklist_lock); - if (sysctl_oom_kill_allocating_task && + if (sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current, NULL, nodemask) && - current->mm && !atomic_read(¤t->mm->oom_disable_count)) { + current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN && + !atomic_read(¤t->mm->oom_disable_count)) { /* * oom_kill_process() needs tasklist_lock held. If it returns * non-zero, current could not be killed so we must fallback to From d226f0c37a6a29d230523664561f1dac15442665 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Jun 2012 10:42:51 -0700 Subject: [PATCH 117/678] sched: clock wrap bug in 2.6.35-stable kills scheduling On Mon, 2012-06-25 at 11:45 +0200, Peter Zijlstra wrote: > On Sun, 2012-06-24 at 19:32 +0200, Thomas Lange wrote: > > Bug was introduced in 2.6.35.12 and is still present in linux-2.6.35.y HEAD. > > Ok, so nobody cares about that.. what does something recent like 3.5-rc4 > do? > > If that's fixed, find the patch that fixes it. If not, we'll have a > look. If anything, I think something like the below ought to cure things. --- kernel/sched_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9d8af0b3fb6..63ce8c3d05c 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -74,7 +74,7 @@ */ unsigned long long __attribute__((weak)) sched_clock(void) { - return (unsigned long long)(jiffies - INITIAL_JIFFIES) + return (unsigned long long)(get_jiffies_64() - INITIAL_JIFFIES) * (NSEC_PER_SEC / HZ); } EXPORT_SYMBOL_GPL(sched_clock); From 4df58dc469437c7541f146050a0f7c585767d3c7 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 6 Dec 2011 20:40:36 -0800 Subject: [PATCH 118/678] regulator: fix incorrectly cached voltage on error In regulator_set_voltage, if _regulator_do_set_voltage returns an error, the new voltage will still be cached. If regulator_set_voltage is called again with the same constraints, it will return success, but the voltage will still be at the old voltage. Keep the old voltage cached if _regulator_do_set_voltage returns and error. Signed-off-by: Colin Cross Change-Id: I7c9cb3e508be92a3ec807e88f093a8e185e56b3c --- drivers/regulator/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 1b7d64118e4..9cf23798216 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1785,6 +1785,8 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) { struct regulator_dev *rdev = regulator->rdev; int ret = 0; + int old_min_uV; + int old_max_uV; mutex_lock(&rdev->mutex); @@ -1806,6 +1808,8 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) ret = regulator_check_voltage(rdev, &min_uV, &max_uV); if (ret < 0) goto out; + old_min_uV = regulator->min_uV; + old_max_uV = regulator->max_uV; regulator->min_uV = min_uV; regulator->max_uV = max_uV; @@ -1814,6 +1818,10 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) goto out; ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); + if (ret < 0) { + regulator->min_uV = old_min_uV; + regulator->max_uV = old_max_uV; + } out: mutex_unlock(&rdev->mutex); From 67565e6e46dc27a5d78854640d88cceb0b419fce Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 7 Nov 2012 21:02:49 -0500 Subject: [PATCH 119/678] Revert "minor changes to ondemand" This reverts commit 4500316d85f68e006a3695e60b95f99f2a09eb3a. --- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c72b0170499..244640ff398 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,8 +28,8 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (3) -#define DEF_FREQUENCY_UP_THRESHOLD (95) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) From 422406eeeaa1c450dd33cb2dd42d1d9cb39c0dc6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 8 Nov 2012 09:50:23 -0500 Subject: [PATCH 120/678] interactive: kang from franco with thanks... too lazy to go through commits --- drivers/cpufreq/cpufreq_interactive.c | 322 ++++++++++----------- include/trace/events/cpufreq_interactive.h | 8 +- 2 files changed, 146 insertions(+), 184 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 24633ca8778..6cff509340e 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -58,15 +58,10 @@ struct cpufreq_interactive_cpuinfo { static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); -/* Workqueues handle frequency scaling */ -static struct task_struct *up_task; -static struct workqueue_struct *down_wq; -static struct work_struct freq_scale_down_work; -static cpumask_t up_cpumask; -static spinlock_t up_cpumask_lock; -static cpumask_t down_cpumask; -static spinlock_t down_cpumask_lock; -static struct mutex set_speed_lock; +/* realtime thread handles frequency scaling */ +static struct task_struct *speedchange_task; +static cpumask_t speedchange_cpumask; +static spinlock_t speedchange_cpumask_lock; struct cpufreq_interactive_core_lock { struct pm_qos_request_list qos_min_req; @@ -84,16 +79,18 @@ struct cpufreq_interactive_core_lock { static struct cpufreq_interactive_core_lock core_lock; - /* Hi speed to bump to from lo speed when load burst (default max) */ -static u64 hispeed_freq; +static unsigned int hispeed_freq = 1000000; + +/* CPU will be boosted to this freq - default 1000Mhz - when an input event is detected */ +static unsigned int input_boost_freq = 860000; /* Boost frequency by boost_factor when CPU load at or above this value. */ -#define DEFAULT_GO_MAXSPEED_LOAD 80 +#define DEFAULT_GO_MAXSPEED_LOAD 85 static unsigned long go_maxspeed_load; /* Go to hispeed_freq when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 80 +#define DEFAULT_GO_HISPEED_LOAD 85 static unsigned long go_hispeed_load; /* Base of exponential raise to max speed; if 0 - jump to maximum */ @@ -141,6 +138,7 @@ struct cpufreq_interactive_inputopen { }; static struct cpufreq_interactive_inputopen inputopen; +static struct workqueue_struct *inputopen_wq; /* * Non-zero means longer-term speed boost active. @@ -173,7 +171,7 @@ static unsigned int cpufreq_interactive_get_target( */ if (load_since_change > cpu_load) cpu_load = load_since_change; - + /* Exponential boost policy */ if (boost_factor) { @@ -198,8 +196,9 @@ static unsigned int cpufreq_interactive_get_target( /* Jump boost policy */ if (cpu_load >= go_hispeed_load || boost_val) { - if (pcpu->target_freq <= pcpu->policy->min) { - target_freq = hispeed_freq; + if (pcpu->target_freq < hispeed_freq && + hispeed_freq < pcpu->policy->max) { + target_freq = hispeed_freq; } else { target_freq = pcpu->policy->max * cpu_load / 100; @@ -319,7 +318,7 @@ static void cpufreq_interactive_timer(unsigned long data) load_since_change = 100 * (delta_time - delta_idle) / delta_time; } - + /* * Combine short-term load (since last idle timer started or timer * function re-armed itself) and long-term load (since last frequency @@ -366,20 +365,12 @@ static void cpufreq_interactive_timer(unsigned long data) trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, new_freq); - - if (new_freq < pcpu->target_freq) { - pcpu->target_freq = new_freq; - spin_lock_irqsave(&down_cpumask_lock, flags); - cpumask_set_cpu(data, &down_cpumask); - spin_unlock_irqrestore(&down_cpumask_lock, flags); - queue_work(down_wq, &freq_scale_down_work); - } else { - pcpu->target_freq = new_freq; - spin_lock_irqsave(&up_cpumask_lock, flags); - cpumask_set_cpu(data, &up_cpumask); - spin_unlock_irqrestore(&up_cpumask_lock, flags); - wake_up_process(up_task); - } + + pcpu->target_freq = new_freq; + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + cpumask_set_cpu(data, &speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + wake_up_process(speedchange_task); rearm_if_notmax: /* @@ -507,7 +498,7 @@ static void cpufreq_interactive_idle_end(void) } -static int cpufreq_interactive_up_task(void *data) +static int cpufreq_interactive_speedchange_task(void *data) { unsigned int cpu; cpumask_t tmp_mask; @@ -516,22 +507,22 @@ static int cpufreq_interactive_up_task(void *data) while (1) { set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); - if (cpumask_empty(&up_cpumask)) { - spin_unlock_irqrestore(&up_cpumask_lock, flags); + if (cpumask_empty(&speedchange_cpumask)) { + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); schedule(); if (kthread_should_stop()) break; - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); } set_current_state(TASK_RUNNING); - tmp_mask = up_cpumask; - cpumask_clear(&up_cpumask); - spin_unlock_irqrestore(&up_cpumask_lock, flags); + tmp_mask = speedchange_cpumask; + cpumask_clear(&speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); for_each_cpu(cpu, &tmp_mask) { unsigned int j; @@ -543,8 +534,6 @@ static int cpufreq_interactive_up_task(void *data) if (!pcpu->governor_enabled) continue; - mutex_lock(&set_speed_lock); - for_each_cpu(j, pcpu->policy->cpus) { struct cpufreq_interactive_cpuinfo *pjcpu = &per_cpu(cpuinfo, j); @@ -556,9 +545,8 @@ static int cpufreq_interactive_up_task(void *data) __cpufreq_driver_target(pcpu->policy, max_freq, CPUFREQ_RELATION_H); - mutex_unlock(&set_speed_lock); - trace_cpufreq_interactive_up(cpu, pcpu->target_freq, + trace_cpufreq_interactive_setspeed(cpu, pcpu->target_freq, pcpu->policy->cur); pcpu->freq_change_time_in_idle = @@ -572,54 +560,6 @@ static int cpufreq_interactive_up_task(void *data) return 0; } -static void cpufreq_interactive_freq_down(struct work_struct *work) -{ - unsigned int cpu; - cpumask_t tmp_mask; - unsigned long flags; - struct cpufreq_interactive_cpuinfo *pcpu; - - spin_lock_irqsave(&down_cpumask_lock, flags); - tmp_mask = down_cpumask; - cpumask_clear(&down_cpumask); - spin_unlock_irqrestore(&down_cpumask_lock, flags); - - for_each_cpu(cpu, &tmp_mask) { - unsigned int j; - unsigned int max_freq = 0; - - pcpu = &per_cpu(cpuinfo, cpu); - smp_rmb(); - - if (!pcpu->governor_enabled) - continue; - - mutex_lock(&set_speed_lock); - - for_each_cpu(j, pcpu->policy->cpus) { - struct cpufreq_interactive_cpuinfo *pjcpu = - &per_cpu(cpuinfo, j); - - if (pjcpu->target_freq > max_freq) - max_freq = pjcpu->target_freq; - } - - __cpufreq_driver_target(pcpu->policy, max_freq, - CPUFREQ_RELATION_H); - - mutex_unlock(&set_speed_lock); - - trace_cpufreq_interactive_down(cpu, pcpu->target_freq, - pcpu->policy->cur); - - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(cpu, - &pcpu->freq_change_time); - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(cpu, NULL); - } -} - static void cpufreq_interactive_boost(void) { int i; @@ -627,14 +567,14 @@ static void cpufreq_interactive_boost(void) unsigned long flags; struct cpufreq_interactive_cpuinfo *pcpu; - spin_lock_irqsave(&up_cpumask_lock, flags); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); for_each_online_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - if (pcpu->target_freq < hispeed_freq) { - pcpu->target_freq = hispeed_freq; - cpumask_set_cpu(i, &up_cpumask); + if (pcpu->target_freq < input_boost_freq) { + pcpu->target_freq = input_boost_freq; + cpumask_set_cpu(i, &speedchange_cpumask); anyboost = 1; } @@ -646,15 +586,15 @@ static void cpufreq_interactive_boost(void) pcpu->floor_validate_time = ktime_to_us(ktime_get()); } - spin_unlock_irqrestore(&up_cpumask_lock, flags); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); if (anyboost) - wake_up_process(up_task); + wake_up_process(speedchange_task); } static void cpufreq_interactive_core_lock_timer(unsigned long data) { - queue_work(down_wq, &core_lock.unlock_work); + queue_work(inputopen_wq, &core_lock.unlock_work); } static void cpufreq_interactive_unlock_cores(struct work_struct *wq) @@ -717,6 +657,29 @@ static int cpufreq_interactive_lock_cores_task(void *data) return 0; } +static ssize_t show_input_boost_freq(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", input_boost_freq); +} + +static ssize_t store_input_boost_freq(struct kobject *kobj, + struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + long unsigned int val; + + ret = strict_strtoul(buf, 0, &val); + if (ret < 0) + return ret; + input_boost_freq = val; + return count; +} + +static struct global_attr input_boost_freq_attr = __ATTR(input_boost_freq, 0644, + show_input_boost_freq, store_input_boost_freq); + /* * Pulsed boost on input event raises CPUs to hispeed_freq and lets * usual algorithm of min_sample_time decide when to allow speed @@ -752,7 +715,7 @@ static int cpufreq_interactive_input_connect(struct input_handler *handler, struct input_handle *handle; int error; - pr_debug("%s: connect to %s\n", __func__, dev->name); + pr_info("%s: connect to %s\n", __func__, dev->name); handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); if (!handle) return -ENOMEM; @@ -766,7 +729,7 @@ static int cpufreq_interactive_input_connect(struct input_handler *handler, goto err; inputopen.handle = handle; - queue_work(down_wq, &inputopen.inputopen_work); + queue_work(inputopen_wq, &inputopen.inputopen_work); return 0; err: kfree(handle); @@ -829,6 +792,27 @@ static ssize_t store_go_maxspeed_load(struct kobject *kobj, static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644, show_go_maxspeed_load, store_go_maxspeed_load); +static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", input_boost_val); +} + +static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = strict_strtoul(buf, 0, &val); + if (ret < 0) + return ret; + input_boost_val = val; + return count; +} + +define_one_global_rw(input_boost); + static ssize_t show_boost_factor(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -915,7 +899,7 @@ static struct global_attr max_boost_attr = __ATTR(max_boost, 0644, static ssize_t show_hispeed_freq(struct kobject *kobj, struct attribute *attr, char *buf) { - return sprintf(buf, "%llu\n", hispeed_freq); + return sprintf(buf, "%u\n", hispeed_freq); } static ssize_t store_hispeed_freq(struct kobject *kobj, @@ -923,9 +907,9 @@ static ssize_t store_hispeed_freq(struct kobject *kobj, size_t count) { int ret; - u64 val; + long unsigned int val; - ret = strict_strtoull(buf, 0, &val); + ret = strict_strtoul(buf, 0, &val); if (ret < 0) return ret; hispeed_freq = val; @@ -1024,27 +1008,6 @@ static ssize_t store_timer_rate(struct kobject *kobj, static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644, show_timer_rate, store_timer_rate); -static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%u\n", input_boost_val); -} - -static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - input_boost_val = val; - return count; -} - -define_one_global_rw(input_boost); - static ssize_t show_boost(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1075,6 +1038,7 @@ static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, define_one_global_rw(boost); static struct attribute *interactive_attributes[] = { + &input_boost_freq_attr.attr, &go_maxspeed_load_attr.attr, &boost_factor_attr.attr, &max_boost_attr.attr, @@ -1095,6 +1059,26 @@ static struct attribute_group interactive_attr_group = { .name = "interactive", }; +static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, + unsigned long val, + void *data) +{ + switch (val) { + case IDLE_START: + cpufreq_interactive_idle_start(); + break; + case IDLE_END: + cpufreq_interactive_idle_end(); + break; + } + + return 0; +} + +static struct notifier_block cpufreq_interactive_idle_nb = { + .notifier_call = cpufreq_interactive_idle_notifier, +}; + static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int event) { @@ -1151,8 +1135,9 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, rc = input_register_handler(&cpufreq_interactive_input_handler); if (rc) pr_warn("%s: failed to register input handler\n", - __func__); - + __func__); + + idle_notifier_register(&cpufreq_interactive_idle_nb); break; case CPUFREQ_GOV_STOP: @@ -1171,10 +1156,11 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, pcpu->idle_exit_time = 0; } - flush_work(&freq_scale_down_work); + flush_work(&inputopen.inputopen_work); if (atomic_dec_return(&active_count) > 0) return 0; - + + idle_notifier_unregister(&cpufreq_interactive_idle_nb); input_unregister_handler(&cpufreq_interactive_input_handler); sysfs_remove_group(cpufreq_global_kobject, &interactive_attr_group); @@ -1193,31 +1179,16 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, return 0; } -static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, - unsigned long val, - void *data) -{ - switch (val) { - case IDLE_START: - cpufreq_interactive_idle_start(); - break; - case IDLE_END: - cpufreq_interactive_idle_end(); - break; - } - - return 0; -} - -static struct notifier_block cpufreq_interactive_idle_nb = { - .notifier_call = cpufreq_interactive_idle_notifier, -}; - static int __init cpufreq_interactive_init(void) { unsigned int i; struct cpufreq_interactive_cpuinfo *pcpu; - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + /* + * If MAX_USER_RT_PRIO < MAX_RT_PRIO the kernel thread has higher priority than any user thread + * In this case MAX_USER_RT_PRIO = 99 and MAX_RT_PRIO = 100, therefore boosting the priority of this + * kernel thread above user threads which will, by my reason, increase interactvitiy. + */ + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO-1 }; go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD; go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; @@ -1233,27 +1204,26 @@ static int __init cpufreq_interactive_init(void) pcpu->cpu_timer.data = i; } - up_task = kthread_create(cpufreq_interactive_up_task, NULL, - "kinteractiveup"); - if (IS_ERR(up_task)) - return PTR_ERR(up_task); - - sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m); - get_task_struct(up_task); - - /* No rescuer thread, bind to CPU queuing the work for possibly - warm cache (probably doesn't matter much). */ - down_wq = alloc_workqueue("knteractive_down", 0, 1); - - if (!down_wq) - goto err_freeuptask; - - INIT_WORK(&freq_scale_down_work, - cpufreq_interactive_freq_down); + spin_lock_init(&speedchange_cpumask_lock); + + speedchange_task = + kthread_create(cpufreq_interactive_speedchange_task, NULL, + "cfinteractive"); + if (IS_ERR(speedchange_task)) + return PTR_ERR(speedchange_task); + + sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); + get_task_struct(speedchange_task); + + inputopen_wq = create_workqueue("cfinteractive"); + + if (!inputopen_wq) + goto err_freetask; + + INIT_WORK(&inputopen.inputopen_work, cpufreq_interactive_input_open); - spin_lock_init(&up_cpumask_lock); - spin_lock_init(&down_cpumask_lock); - mutex_init(&set_speed_lock); + /* NB: wake up so the thread does not look hung to the freezer */ + wake_up_process(speedchange_task); pm_qos_add_request(&core_lock.qos_min_req, PM_QOS_MIN_ONLINE_CPUS, PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); @@ -1278,13 +1248,11 @@ static int __init cpufreq_interactive_init(void) sched_setscheduler_nocheck(core_lock.lock_task, SCHED_FIFO, ¶m); get_task_struct(core_lock.lock_task); - idle_notifier_register(&cpufreq_interactive_idle_nb); - INIT_WORK(&inputopen.inputopen_work, cpufreq_interactive_input_open); INIT_WORK(&core_lock.unlock_work, cpufreq_interactive_unlock_cores); return cpufreq_register_governor(&cpufreq_gov_interactive); - -err_freeuptask: - put_task_struct(up_task); + +err_freetask: + put_task_struct(speedchange_task); return -ENOMEM; } @@ -1297,9 +1265,9 @@ module_init(cpufreq_interactive_init); static void __exit cpufreq_interactive_exit(void) { cpufreq_unregister_governor(&cpufreq_gov_interactive); - kthread_stop(up_task); - put_task_struct(up_task); - destroy_workqueue(down_wq); + kthread_stop(speedchange_task); + put_task_struct(speedchange_task); + destroy_workqueue(inputopen_wq); pm_qos_remove_request(&core_lock.qos_min_req); pm_qos_remove_request(&core_lock.qos_max_req); diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h index bcef5053be8..64c9a825346 100644 --- a/include/trace/events/cpufreq_interactive.h +++ b/include/trace/events/cpufreq_interactive.h @@ -28,13 +28,7 @@ DECLARE_EVENT_CLASS(set, __entry->actualfreq) ); -DEFINE_EVENT(set, cpufreq_interactive_up, - TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), - TP_ARGS(cpu_id, targfreq, actualfreq) -); - -DEFINE_EVENT(set, cpufreq_interactive_down, +DEFINE_EVENT(set, cpufreq_interactive_setspeed, TP_PROTO(u32 cpu_id, unsigned long targfreq, unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq) From be1ff83beffa30e99dbb0cf7a17a11f499122877 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 8 Nov 2012 09:53:31 -0500 Subject: [PATCH 121/678] cpu-tegra: dynamic edp - lower temp threshold back to 60 In my testing I can't get it close to 60 if I try anyway. --- arch/arm/mach-tegra/cpu-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c index 934988725bf..0d75fcd5e61 100755 --- a/arch/arm/mach-tegra/cpu-tegra.c +++ b/arch/arm/mach-tegra/cpu-tegra.c @@ -60,7 +60,7 @@ static bool force_policy_max = 1; static bool coldstart = 1; #define TEGRA3_OVERCLOCK -#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (68) +#define TEGRA3_DYNAMIC_EDP_THRES_TEMP (60) static bool edp_enable = 1; static int force_policy_max_set(const char *arg, const struct kernel_param *kp) From 4716957b825951a187f994ffdd678f3c8cfaa769 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 8 Nov 2012 09:56:07 -0500 Subject: [PATCH 122/678] tegra-dvfs: switch to cpu variant 3 based dvfs g table --- arch/arm/mach-tegra/tegra3_dvfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 101281ef746..70d78d3a4c5 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -165,7 +165,8 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), /* Nexus 7 - faking speedo id = 4, process id =2*/ - CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), +// CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), + CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600), /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ // CPU_DVFS("cpu_g", 4, 2, MHZ, 475, 475, 620, 620, 860, 860, 1000, 1000, 1100, 1100, 1200, 1300, 1400, 1500, 1600), From 3d9731cfcf62a8dac4699f487b7d981edb501dce Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 8 Nov 2012 09:57:36 -0500 Subject: [PATCH 123/678] mach-tegra: edp: balance out edp frequency limits --- arch/arm/mach-tegra/edp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/edp.c b/arch/arm/mach-tegra/edp.c index 56465eafa8b..b8d3fd64d7f 100644 --- a/arch/arm/mach-tegra/edp.c +++ b/arch/arm/mach-tegra/edp.c @@ -362,7 +362,7 @@ void __init tegra_init_cpu_edp_limits(unsigned int regulator_mA) for (j = 0; j < edp_limits_size; j++) { e[j].temperature = (int)t[i+j].temperature; - e[j].freq_limits[0] = (unsigned int)t[i+j].freq_limits[0] * 10000; + e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-10) * 10000; e[j].freq_limits[1] = (unsigned int)t[i+j].freq_limits[1] * 10000; e[j].freq_limits[2] = (unsigned int)t[i+j].freq_limits[2] * 10000; e[j].freq_limits[3] = (unsigned int)t[i+j].freq_limits[3] * 10000; From 64c98207418ee18e268288c3b868f018abeed0f5 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 8 Nov 2012 17:36:39 -0500 Subject: [PATCH 124/678] Interactive: set input boost freq to lp max of 620Mhz --- arch/arm/configs/metallice_grouper_defconfig | 2 +- drivers/cpufreq/cpufreq_interactive.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 3c95343c904..dfaef041a78 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -18,7 +18,7 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_HARDIRQS_SW_RESEND=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_LOCKBREAK=y -CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_ARCH_HAS_CPUFREQ=y CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y CONFIG_GENERIC_HWEIGHT=y diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 6cff509340e..0ef1fcb14ae 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -83,7 +83,7 @@ static struct cpufreq_interactive_core_lock core_lock; static unsigned int hispeed_freq = 1000000; /* CPU will be boosted to this freq - default 1000Mhz - when an input event is detected */ -static unsigned int input_boost_freq = 860000; +static unsigned int input_boost_freq = 620000; /* Boost frequency by boost_factor when CPU load at or above this value. */ #define DEFAULT_GO_MAXSPEED_LOAD 85 @@ -94,7 +94,7 @@ static unsigned long go_maxspeed_load; static unsigned long go_hispeed_load; /* Base of exponential raise to max speed; if 0 - jump to maximum */ -static unsigned long boost_factor; +static unsigned long boost_factor = 2; /* Max frequency boost in Hz; if 0 - no max is enforced */ static unsigned long max_boost; @@ -130,7 +130,7 @@ static unsigned long above_hispeed_delay_val; /* * Boost pulse to hispeed on touchscreen input. */ -static int input_boost_val; +static int input_boost_val = 1; struct cpufreq_interactive_inputopen { struct input_handle *handle; From c0e43d688625bc6d4640182545212ce37b698f56 Mon Sep 17 00:00:00 2001 From: Lance Poore Date: Sun, 12 Aug 2012 15:25:06 -0500 Subject: [PATCH 125/678] SCHEDULER: Autogroup patch group by current user android UID instead of task ID --- kernel/sys.c | 3 ++- kernel/sysctl.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index f24794e7652..4e2306e1a04 100755 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -655,6 +655,7 @@ static int set_user(struct cred *new) free_uid(new->user); new->user = new_user; + sched_autogroup_create_attach(current); return 0; } @@ -1164,7 +1165,7 @@ SYSCALL_DEFINE0(setsid) write_unlock_irq(&tasklist_lock); if (err > 0) { proc_sid_connector(group_leader); - sched_autogroup_create_attach(group_leader); + } return err; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fd15163f360..b33ea9870c4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -375,7 +375,7 @@ static struct ctl_table kern_table[] = { .data = &sysctl_sched_autogroup_enabled, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec, .extra1 = &zero, .extra2 = &one, }, From cfc112731d6b2555d4dfc87cec1dd074b692c4c0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Nov 2012 14:01:37 -0500 Subject: [PATCH 126/678] cpufreq: interactive: set floor freq as input boost freq as well instead of hispeed freq --- drivers/cpufreq/cpufreq_interactive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 0ef1fcb14ae..4d8283c27d4 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -582,7 +582,7 @@ static void cpufreq_interactive_boost(void) * validated. */ - pcpu->floor_freq = hispeed_freq; + pcpu->floor_freq = input_boost_freq; pcpu->floor_validate_time = ktime_to_us(ktime_get()); } From a4fe8b7fd0368a819b470bc7ab79e8788225bd31 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Nov 2012 14:16:22 -0500 Subject: [PATCH 127/678] cpufreq: interactive: remove exponential boost policy code and params --- drivers/cpufreq/cpufreq_interactive.c | 132 -------------------------- 1 file changed, 132 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 4d8283c27d4..c245b3d106a 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -85,29 +85,13 @@ static unsigned int hispeed_freq = 1000000; /* CPU will be boosted to this freq - default 1000Mhz - when an input event is detected */ static unsigned int input_boost_freq = 620000; -/* Boost frequency by boost_factor when CPU load at or above this value. */ -#define DEFAULT_GO_MAXSPEED_LOAD 85 -static unsigned long go_maxspeed_load; - /* Go to hispeed_freq when CPU load at or above this value. */ #define DEFAULT_GO_HISPEED_LOAD 85 static unsigned long go_hispeed_load; -/* Base of exponential raise to max speed; if 0 - jump to maximum */ -static unsigned long boost_factor = 2; - -/* Max frequency boost in Hz; if 0 - no max is enforced */ -static unsigned long max_boost; - /* Consider IO as busy */ static unsigned long io_is_busy; -/* - * Targeted sustainable load relatively to current frequency. - * If 0, target is set realtively to the max speed - */ -static unsigned long sustain_load; - /* * The minimum amount of time to spend at a frequency before we can ramp down. */ @@ -171,28 +155,6 @@ static unsigned int cpufreq_interactive_get_target( */ if (load_since_change > cpu_load) cpu_load = load_since_change; - - /* Exponential boost policy */ - if (boost_factor) { - - if (cpu_load >= go_maxspeed_load) { - target_freq = pcpu->policy->cur * boost_factor; - - if (max_boost && - target_freq > pcpu->policy->cur + max_boost) - - target_freq = pcpu->policy->cur + max_boost; - } else { - - if (!sustain_load) - sustain_load = 100; - - target_freq = - (pcpu->policy->cur * cpu_load / sustain_load); - } - - goto done; - } /* Jump boost policy */ if (cpu_load >= go_hispeed_load || boost_val) { @@ -223,7 +185,6 @@ static unsigned int cpufreq_interactive_get_target( target_freq = pcpu->policy->max * cpu_load / 100; } -done: target_freq = min(target_freq, pcpu->policy->max); return target_freq; } @@ -770,28 +731,6 @@ static struct input_handler cpufreq_interactive_input_handler = { .id_table = cpufreq_interactive_ids, }; -static ssize_t show_go_maxspeed_load(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", go_maxspeed_load); -} - -static ssize_t store_go_maxspeed_load(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - go_maxspeed_load = val; - return count; -} - -static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644, - show_go_maxspeed_load, store_go_maxspeed_load); - static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -813,28 +752,6 @@ static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, define_one_global_rw(input_boost); -static ssize_t show_boost_factor(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", boost_factor); -} - -static ssize_t store_boost_factor(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - boost_factor = val; - return count; -} - -static struct global_attr boost_factor_attr = __ATTR(boost_factor, 0644, - show_boost_factor, store_boost_factor); - static ssize_t show_io_is_busy(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -852,50 +769,6 @@ static ssize_t store_io_is_busy(struct kobject *kobj, static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644, show_io_is_busy, store_io_is_busy); -static ssize_t show_sustain_load(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", sustain_load); -} - -static ssize_t store_sustain_load(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - sustain_load = val; - return count; -} - -static struct global_attr sustain_load_attr = __ATTR(sustain_load, 0644, - show_sustain_load, store_sustain_load); - -static ssize_t show_max_boost(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", max_boost); -} - -static ssize_t store_max_boost(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - max_boost = val; - return count; -} - -static struct global_attr max_boost_attr = __ATTR(max_boost, 0644, - show_max_boost, store_max_boost); - static ssize_t show_hispeed_freq(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1039,11 +912,7 @@ define_one_global_rw(boost); static struct attribute *interactive_attributes[] = { &input_boost_freq_attr.attr, - &go_maxspeed_load_attr.attr, - &boost_factor_attr.attr, - &max_boost_attr.attr, &io_is_busy_attr.attr, - &sustain_load_attr.attr, &hispeed_freq_attr.attr, &go_hispeed_load_attr.attr, &above_hispeed_delay.attr, @@ -1190,7 +1059,6 @@ static int __init cpufreq_interactive_init(void) */ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO-1 }; - go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD; go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; min_sample_time = DEFAULT_MIN_SAMPLE_TIME; above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; From 73fa976749911e454a2422bb4aef3af0472a3ab9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Nov 2012 14:17:45 -0500 Subject: [PATCH 128/678] cpufreq: interactive: limit initial speed bump to input boost freq as well When go hispeed load is met, and current target freq is less than both hispeed freq and input boost freq, go to the lesser of the two freqs. If the hispeed load is met and the current target freq is less than hispeed freq but greater than or equal to input boost freq, boost to hispeed freq. --- drivers/cpufreq/cpufreq_interactive.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index c245b3d106a..2a6751a1915 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -160,7 +160,12 @@ static unsigned int cpufreq_interactive_get_target( if (cpu_load >= go_hispeed_load || boost_val) { if (pcpu->target_freq < hispeed_freq && hispeed_freq < pcpu->policy->max) { - target_freq = hispeed_freq; + if (pcpu->target_freq < input_boost_freq && + input_boost_freq < pcpu->policy->max) { + target_freq = min(hispeed_freq,input_boost_freq); + } else { + target_freq = hispeed_freq; + } } else { target_freq = pcpu->policy->max * cpu_load / 100; From 501f4197ffa696803fd80036a74d716547c102ce Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Nov 2012 14:34:09 -0500 Subject: [PATCH 129/678] cpufreq: interactive: run at fraction of hispeed_freq when load is low --- drivers/cpufreq/cpufreq_interactive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 2a6751a1915..d1f68ff2268 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -187,7 +187,7 @@ static unsigned int cpufreq_interactive_get_target( } } } else { - target_freq = pcpu->policy->max * cpu_load / 100; + target_freq = hispeed_freq * cpu_load / 100; } target_freq = min(target_freq, pcpu->policy->max); From 84c8d4718238bccd0292957be558461be518a767 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Nov 2012 14:56:00 -0500 Subject: [PATCH 130/678] cpufreq: interactive: if hispeed freq > input freq, run at fraction of average when load is low --- drivers/cpufreq/cpufreq_interactive.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index d1f68ff2268..815e9175ef5 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -187,7 +187,11 @@ static unsigned int cpufreq_interactive_get_target( } } } else { - target_freq = hispeed_freq * cpu_load / 100; + if (hispeed_freq > input_boost_freq) { + target_freq = ((hispeed_freq + input_boost_freq) / 2) * cpu_load / 100; + } else { + target_freq = hispeed_freq * cpu_load / 100; + } } target_freq = min(target_freq, pcpu->policy->max); From 9cf447f84d67f2fb2bfc73a2ab58a8d0fb27afd9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Nov 2012 14:59:15 -0500 Subject: [PATCH 131/678] cpufreq: interactive: set hispeed freq to 1.1Ghz --- drivers/cpufreq/cpufreq_interactive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 815e9175ef5..31aae3507c9 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -80,7 +80,7 @@ struct cpufreq_interactive_core_lock { static struct cpufreq_interactive_core_lock core_lock; /* Hi speed to bump to from lo speed when load burst (default max) */ -static unsigned int hispeed_freq = 1000000; +static unsigned int hispeed_freq = 1100000; /* CPU will be boosted to this freq - default 1000Mhz - when an input event is detected */ static unsigned int input_boost_freq = 620000; From f6992d6e921989dad4c06e79129e3fa8d8f808b4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Nov 2012 19:47:48 -0500 Subject: [PATCH 132/678] cpufreq: interactive: set go hispeed load to 80 --- drivers/cpufreq/cpufreq_interactive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 31aae3507c9..7c19354aeaa 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -86,7 +86,7 @@ static unsigned int hispeed_freq = 1100000; static unsigned int input_boost_freq = 620000; /* Go to hispeed_freq when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 85 +#define DEFAULT_GO_HISPEED_LOAD 80 static unsigned long go_hispeed_load; /* Consider IO as busy */ From a279f8bb5efe55dde1532aeff4eaf1851582f5eb Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 16 Nov 2012 02:15:57 -0500 Subject: [PATCH 133/678] deconfig: enable suspend/resume watchdog for tegra --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index dfaef041a78..bb05e81ed7f 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -344,7 +344,7 @@ CONFIG_TEGRA_BB_XMM_POWER=y # CONFIG_TEGRA_BB_XMM_POWER2 is not set # CONFIG_TEGRA_THERMAL_SYSFS is not set CONFIG_TEGRA_PLLM_RESTRICTED=y -# CONFIG_TEGRA_WDT_RECOVERY is not set +CONFIG_TEGRA_WDT_RECOVERY=y CONFIG_TEGRA_LP2_ARM_TWD=y CONFIG_TEGRA_SLOW_CSITE=y # CONFIG_TEGRA_PREINIT_CLOCKS is not set From 84a98d688832e75b9d91387dbccb6ca86548bc6b Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Tue, 9 Oct 2012 12:41:17 +0200 Subject: [PATCH 134/678] drivers: touchscreen: ektf3k: fix & enable elan sysfs > android_touch Signed-off-by: Dennis Rassmann --- drivers/input/touchscreen/ektf3k.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index 6e4b6993598..d1b1340202d 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -450,7 +450,7 @@ static struct attribute *elan_attr[] = { static struct kobject *android_touch_kobj; -static int elan_ktf3k_touch_sysfs_init(void) +static int elan_touch_sysfs_init(void) { int ret ; @@ -1550,8 +1550,8 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, private_ts = ts; - //elan_ktf2k_touch_sysfs_init(); - ts->attrs.attrs = elan_attr; + elan_touch_sysfs_init(); + ts->attrs.attrs = elan_attr; err = sysfs_create_group(&client->dev.kobj, &ts->attrs); if (err) { dev_err(&client->dev, "Not able to create the sysfs\n"); From 36a85b5e63b5a02e55c6225a7565699d8f103c0e Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 16 Nov 2012 02:59:40 -0500 Subject: [PATCH 135/678] Revert "cpufreq: interactive: limit initial speed bump to input boost freq as well" This reverts commit d299f1b45e143b0a6071a40a50c4a26a286f9e78. --- drivers/cpufreq/cpufreq_interactive.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 7c19354aeaa..6a0490c1884 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -160,12 +160,7 @@ static unsigned int cpufreq_interactive_get_target( if (cpu_load >= go_hispeed_load || boost_val) { if (pcpu->target_freq < hispeed_freq && hispeed_freq < pcpu->policy->max) { - if (pcpu->target_freq < input_boost_freq && - input_boost_freq < pcpu->policy->max) { - target_freq = min(hispeed_freq,input_boost_freq); - } else { - target_freq = hispeed_freq; - } + target_freq = hispeed_freq; } else { target_freq = pcpu->policy->max * cpu_load / 100; From a06d592ebb45d723af0f7ee422beef51ec2913f8 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 18 Nov 2012 03:03:28 -0500 Subject: [PATCH 136/678] Revert "drivers: touchscreen: ektf3k: fix & enable elan sysfs > android_touch" This reverts commit 4bce8a9dde690ab800fb111dfa9451f35569fa9a. --- drivers/input/touchscreen/ektf3k.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index d1b1340202d..6e4b6993598 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -450,7 +450,7 @@ static struct attribute *elan_attr[] = { static struct kobject *android_touch_kobj; -static int elan_touch_sysfs_init(void) +static int elan_ktf3k_touch_sysfs_init(void) { int ret ; @@ -1550,8 +1550,8 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, private_ts = ts; - elan_touch_sysfs_init(); - ts->attrs.attrs = elan_attr; + //elan_ktf2k_touch_sysfs_init(); + ts->attrs.attrs = elan_attr; err = sysfs_create_group(&client->dev.kobj, &ts->attrs); if (err) { dev_err(&client->dev, "Not able to create the sysfs\n"); From 5785d5e344b98e52057ad04165b2df42e4b3143d Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 24 Nov 2012 22:08:44 -0500 Subject: [PATCH 137/678] cpufreq: ondemand.c: minor changes --- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 244640ff398..c72b0170499 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,8 +28,8 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define DEF_FREQUENCY_UP_THRESHOLD (95) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) From e7ea3c635c7e4fa9e1a86ad12ebbbc36a5fe7cf5 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 24 Nov 2012 22:09:02 -0500 Subject: [PATCH 138/678] mach-tegra: cpu-tegra3.c: lp to g delay to 1000ms for battery life --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index b0b0952572a..a020f5ba9bb 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,7 +39,7 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 500 +#define UP2G0_DELAY_MS 1000 #define UP2Gn_DELAY_MS 100 #define DOWN_DELAY_MS 500 From beac4ed54f438d5454feeb54bb8aa7429e5ad3d1 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 25 Nov 2012 19:44:04 -0500 Subject: [PATCH 139/678] Revert "tegra-otg: HACK to allow OTG peripheral and charging at the same time" This reverts commit e12921762aa8a5017fc82257d0d27ffc84d71476. --- drivers/usb/otg/Kconfig | 7 ------- drivers/usb/otg/tegra-otg.c | 17 +++++------------ 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig index 726f3a2b7e8..bcb3e868033 100644 --- a/drivers/usb/otg/Kconfig +++ b/drivers/usb/otg/Kconfig @@ -129,13 +129,6 @@ config USB_TEGRA_OTG Enable this driver on boards which use the internal VBUS and ID sensing of the Tegra USB PHY. -config USB_OTG_ON_CHARGING - boolean "Tegra OTG On Charging Hack" - depends on USB && USB_TEGRA_OTG - default y - help - Enable this to allow OTG peripheral and charging at the same time - config AB8500_USB tristate "AB8500 USB Transceiver Driver" depends on AB8500_CORE diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index bffd0b654b7..ef1792286e7 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -64,11 +64,6 @@ struct tegra_otg_data { }; static struct tegra_otg_data *tegra_clone; -#ifdef CONFIG_USB_OTG_ON_CHARGING -static bool tegra_otg_on_charging = false; -module_param(tegra_otg_on_charging, bool, 0664); -#endif - static inline unsigned long otg_readl(struct tegra_otg_data *tegra, unsigned int offset) { @@ -236,13 +231,11 @@ static void irq_work(struct work_struct *work) tegra_state_name(to)); if (tegra->charger_cb) { - if (tegra_otg_on_charging) - /* enable v_bus detection for charging */ - tegra->detect_vbus = true; - else - /* enable OTG to supply internal power */ - tegra->charger_cb(to, from, tegra->charger_cb_data); - } + // tmtmtm: disable charging of OTG slave + //tegra->charger_cb(to, from, tegra->charger_cb_data); + // tmtmtm: enable vbus in + tegra->detect_vbus = true; + } if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) From 2eb5b020fd852d0b7332d2eeaa88ada2d69ad3fb Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 25 Nov 2012 19:44:34 -0500 Subject: [PATCH 140/678] Revert "OTG HOST moe: disable charging of slave + enable vbus in" This reverts commit 30bf8740284691b75632174c02a899eedf0341c6. --- drivers/usb/otg/tegra-otg.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index ef1792286e7..c1fe7f899f1 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -230,12 +230,8 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - if (tegra->charger_cb) { - // tmtmtm: disable charging of OTG slave - //tegra->charger_cb(to, from, tegra->charger_cb_data); - // tmtmtm: enable vbus in - tegra->detect_vbus = true; - } + if (tegra->charger_cb) + tegra->charger_cb(to, from, tegra->charger_cb_data); if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) From 42ddd61ad81236a4c85cf57a1b64c77ed2650f09 Mon Sep 17 00:00:00 2001 From: Steve Kondik Date: Tue, 11 Sep 2012 18:34:41 +0100 Subject: [PATCH 141/678] lowmemorykiller: Compact memory when killing processes * Memory compaction is never invoked on Android because we avoid swap and don't allocate huge pages. Fix it by invoking compaction from the LMK when a process is killed to reduce memory fragmentation. Change-Id: I2bda790c2093e65fd6f43d52ea5149d6b57cb1e9 Conflicts: drivers/staging/android/lowmemorykiller.c --- drivers/staging/android/lowmemorykiller.c | 5 +++++ include/linux/compaction.h | 5 +++++ mm/compaction.c | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 86d51959b29..d1da40baa5b 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -35,6 +35,7 @@ #include #include #include +#include static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { @@ -55,6 +56,8 @@ static int lowmem_minfree_size = 4; static struct task_struct *lowmem_deathpending; static unsigned long lowmem_deathpending_timeout; +extern int compact_nodes(); + #define lowmem_print(level, x...) \ do { \ if (lowmem_debug_level >= (level)) \ @@ -178,6 +181,8 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); + if (selected) + compact_nodes(); return rem; } diff --git a/include/linux/compaction.h b/include/linux/compaction.h index cc9f7a42864..13c28bdbc36 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -84,6 +84,11 @@ static inline bool compaction_deferred(struct zone *zone) return 1; } +static inline int compact_nodes() +{ + return COMPACT_CONTINUE; +} + #endif /* CONFIG_COMPACTION */ #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) diff --git a/mm/compaction.c b/mm/compaction.c index 6cc604bd564..83bdfaf5f66 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -693,7 +693,7 @@ static int compact_node(int nid) } /* Compact all nodes in the system */ -static int compact_nodes(void) +int compact_nodes(void) { int nid; From b13e3aa1b9452aec2b5877d21fbfaa0706014375 Mon Sep 17 00:00:00 2001 From: Steve Kondik Date: Tue, 11 Sep 2012 19:15:39 +0100 Subject: [PATCH 142/678] lowmemorykiller: Use asynchronous compaction Change-Id: I6a65d06fc30b88fcedaaf1abf1855fdd19e3c912 --- drivers/staging/android/lowmemorykiller.c | 2 +- include/linux/compaction.h | 2 +- mm/compaction.c | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index d1da40baa5b..e764b5d17a6 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -182,7 +182,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); if (selected) - compact_nodes(); + compact_nodes(false); return rem; } diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 13c28bdbc36..233998aab97 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -84,7 +84,7 @@ static inline bool compaction_deferred(struct zone *zone) return 1; } -static inline int compact_nodes() +static inline int compact_nodes(bool sync) { return COMPACT_CONTINUE; } diff --git a/mm/compaction.c b/mm/compaction.c index 83bdfaf5f66..1ed66ec5b35 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -655,7 +655,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, /* Compact all zones within a node */ -static int compact_node(int nid) +static int compact_node(int nid, bool sync) { int zoneid; pg_data_t *pgdat; @@ -673,6 +673,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .sync = sync, }; zone = &pgdat->node_zones[zoneid]; @@ -693,12 +694,12 @@ static int compact_node(int nid) } /* Compact all nodes in the system */ -int compact_nodes(void) +int compact_nodes(bool sync) { int nid; for_each_online_node(nid) - compact_node(nid); + compact_node(nid, sync); return COMPACT_COMPLETE; } @@ -711,7 +712,7 @@ int sysctl_compaction_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { if (write) - return compact_nodes(); + return compact_nodes(true); return 0; } From 7903588ad5c5839abf308230b4e21d773581c631 Mon Sep 17 00:00:00 2001 From: "Kenneth R. Crudup" Date: Fri, 18 Nov 2011 22:14:52 -0800 Subject: [PATCH 143/678] mmc: host: sdhci: Allow use of controller's native max blocksize Currently, any controller supporting a blocksize of 4K or higher is bumped back down to 512 bytes. This change allows using the controller's native blocksize on a per-hardware basis. --- drivers/mmc/host/Kconfig | 11 +++++++++++ drivers/mmc/host/sdhci.c | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 8c87096531e..16e7aadde0c 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -57,6 +57,17 @@ config MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER This is the case for the Freescale eSDHC and Nintendo Wii SDHCI. +config MMC_SDHCI_NATIVE_BLOCKSIZE + bool "Use Host Controller's actual blocksize" + depends on MMC_SDHCI && EXPERIMENTAL + default n + help + Normally, the SDHCI host controller will limit the xfer blocksize + to 512 bytes, even if the hardware host controller can do more. + This option uses the maximum blocksize supported by the hardware. + + I have seen no corruption on a Tegra2, but if unsure, say N. + config MMC_SDHCI_PCI tristate "SDHCI support on PCI bus" depends on MMC_SDHCI && PCI diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 3163d8373d7..a767234d0d0 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2812,11 +2812,13 @@ int sdhci_add_host(struct sdhci_host *host) } else { mmc->max_blk_size = (caps[0] & SDHCI_MAX_BLOCK_MASK) >> SDHCI_MAX_BLOCK_SHIFT; +#ifndef CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE if (mmc->max_blk_size >= 3) { printk(KERN_WARNING "%s: Invalid maximum block size, " "assuming 512 bytes\n", mmc_hostname(mmc)); mmc->max_blk_size = 0; } +#endif } mmc->max_blk_size = 512 << mmc->max_blk_size; @@ -2826,6 +2828,12 @@ int sdhci_add_host(struct sdhci_host *host) */ mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535; +#ifdef CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE + printk(KERN_INFO "%s: mss %u mrs %u mbs %u mbc %u\n", mmc_hostname(mmc), + mmc->max_seg_size, mmc->max_req_size, mmc->max_blk_size, + mmc->max_blk_count); +#endif + /* * Init tasklets. */ From d03778a3fe44d339f37702d69db0be94f53a7508 Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Wed, 3 Oct 2012 17:57:18 +0200 Subject: [PATCH 144/678] drivers: staging: android: lmk compile fixup Signed-off-by: Dennis Rassmann --- arch/arm/configs/motley_grouper_defconfig | 1 + drivers/staging/android/lowmemorykiller.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index c2c2c12db79..aeb0f4a93a7 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -2586,6 +2586,7 @@ CONFIG_MMC_TEST=y # CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_IO_ACCESSORS=y +CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE=y # CONFIG_MMC_SDHCI_PCI is not set CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index e764b5d17a6..a4beb77e7b1 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -56,7 +56,7 @@ static int lowmem_minfree_size = 4; static struct task_struct *lowmem_deathpending; static unsigned long lowmem_deathpending_timeout; -extern int compact_nodes(); +extern int compact_nodes(bool sync); #define lowmem_print(level, x...) \ do { \ From 4ff840728e05e185f0a0c170c4b493769da4b893 Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Wed, 3 Oct 2012 17:42:49 +0200 Subject: [PATCH 145/678] defconfig: use arm optimized crypto algos Signed-off-by: Dennis Rassmann --- arch/arm/configs/motley_grouper_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index aeb0f4a93a7..4b871c7b459 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -3270,6 +3270,7 @@ CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_RMD256 is not set # CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA1_ARM=y CONFIG_CRYPTO_SHA256=y # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_TGR192 is not set @@ -3279,6 +3280,7 @@ CONFIG_CRYPTO_SHA256=y # Ciphers # CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_AES_ARM=y # CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_ARC4=y # CONFIG_CRYPTO_BLOWFISH is not set From 6cfe78b083173529bec78084c16a4db5f20bb45e Mon Sep 17 00:00:00 2001 From: David McCullough Date: Fri, 10 Aug 2012 22:45:06 -0700 Subject: [PATCH 146/678] arm/crypto: Add optimized AES and SHA1 routines Add assembler versions of AES and SHA1 for ARM platforms. This has provided up to a 50% improvement in IPsec/TCP throughout for tunnels using AES128/SHA1. Platform CPU SPeed Endian Before (bps) After (bps) Improvement IXP425 533 MHz big 11217042 15566294 ~38% KS8695 166 MHz little 3828549 5795373 ~51% Signed-off-by: David McCullough --- arch/arm/Makefile | 1 + arch/arm/crypto/Makefile | 10 + arch/arm/crypto/aes-armv4.S | 1112 ++++++++++++++++++++++++++++ arch/arm/crypto/aes_glue.c | 108 +++ arch/arm/crypto/sha1-armv4-large.S | 503 +++++++++++++ arch/arm/crypto/sha1_glue.c | 179 +++++ crypto/Kconfig | 33 + 7 files changed, 1946 insertions(+) create mode 100644 arch/arm/crypto/Makefile create mode 100644 arch/arm/crypto/aes-armv4.S create mode 100644 arch/arm/crypto/aes_glue.c create mode 100644 arch/arm/crypto/sha1-armv4-large.S create mode 100644 arch/arm/crypto/sha1_glue.c diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 941c13400bd..4d81b7fbb0a 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -254,6 +254,7 @@ core-$(CONFIG_VFP) += arch/arm/vfp/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ +core-y += arch/arm/crypto/ core-y += $(machdirs) $(platdirs) drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/ diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile new file mode 100644 index 00000000000..82955273d5e --- /dev/null +++ b/arch/arm/crypto/Makefile @@ -0,0 +1,10 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o +obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o + +aes-arm-y := aes-armv4.o aes_glue.o +sha1-arm-y := sha1-armv4-large.o sha1_glue.o + diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S new file mode 100644 index 00000000000..e59b1d505d6 --- /dev/null +++ b/arch/arm/crypto/aes-armv4.S @@ -0,0 +1,1112 @@ +#define __ARM_ARCH__ __LINUX_ARM_ARCH__ +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ AES for ARMv4 + +@ January 2007. +@ +@ Code uses single 1K S-box and is >2 times faster than code generated +@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which +@ allows to merge logical or arithmetic operation with shift or rotate +@ in one instruction and emit combined result every cycle. The module +@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit +@ key [on single-issue Xscale PXA250 core]. + +@ May 2007. +@ +@ AES_set_[en|de]crypt_key is added. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 12% improvement on +@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~21.5 cycles per byte. + +@ A little glue here to select the correct code below for the ARM CPU +@ that is being targetted. + +.text +.code 32 + +.type AES_Te,%object +.align 5 +AES_Te: +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +@ Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +@ rcon[] +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 +.size AES_Te,.-AES_Te + +@ void AES_encrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_encrypt +.type AES_encrypt,%function +.align 5 +AES_encrypt: + sub r3,pc,#8 @ AES_encrypt + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_encrypt-AES_Te @ Te +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_encrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_encrypt,.-AES_encrypt + +.type _armv4_AES_encrypt,%function +.align 2 +_armv4_AES_encrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0 + and r8,lr,r0,lsr#8 + and r9,lr,r0,lsr#16 + mov r0,r0,lsr#24 +.Lenc_loop: + ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] + and r8,lr,r1 + ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] + ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] + ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] + eor r0,r0,r7,ror#8 + ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,ror#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r6,r9,ror#8 + and r9,lr,r2 + ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] + eor r1,r1,r4,ror#24 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,ror#8 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r6,r9,ror#16 + and r9,lr,r3,lsr#16 @ i2 + ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] + eor r2,r2,r5,ror#16 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] + eor r0,r0,r7,ror#24 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] + eor r2,r2,r9,ror#8 + ldr r4,[r11,#-12] + eor r3,r3,r6,ror#8 + + ldr r5,[r11,#-8] + eor r0,r0,r7 + ldr r6,[r11,#-4] + and r7,lr,r0 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0,lsr#16 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Lenc_loop + + add r10,r10,#2 + + ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] + and r8,lr,r1 + ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] + and r9,lr,r1,lsr#8 + ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] + mov r1,r1,lsr#24 + + ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] + ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] + ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] + eor r0,r7,r0,lsl#8 + ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,lsl#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] + eor r1,r4,r1,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] + mov r2,r2,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] + eor r0,r7,r0,lsl#8 + ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r3,lsr#16 @ i2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] + eor r2,r5,r2,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] + mov r3,r3,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] + eor r0,r7,r0,lsl#8 + ldr r7,[r11,#0] + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r2,r9,lsl#16 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#2 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_encrypt,.-_armv4_AES_encrypt + +.global private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,%function +.align 5 +private_AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: + sub r3,pc,#8 @ AES_set_encrypt_key + teq r0,#0 + moveq r0,#-1 + beq .Labrt + teq r2,#0 + moveq r0,#-1 + beq .Labrt + + teq r1,#128 + beq .Lok + teq r1,#192 + beq .Lok + teq r1,#256 + movne r0,#-1 + bne .Labrt + +.Lok: stmdb sp!,{r4-r12,lr} + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 + + mov r12,r0 @ inp + mov lr,r1 @ bits + mov r11,r2 @ key + +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + str r0,[r11],#16 + orr r3,r3,r5,lsl#16 + str r1,[r11,#-12] + orr r3,r3,r6,lsl#24 + str r2,[r11,#-8] + str r3,[r11,#-4] +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r11],#16 + str r1,[r11,#-12] + str r2,[r11,#-8] + str r3,[r11,#-4] +#endif + + teq lr,#128 + bne .Lnot128 + mov r12,#10 + str r12,[r11,#240-16] + add r6,r10,#256 @ rcon + mov lr,#255 + +.L128_loop: + and r5,lr,r3,lsr#24 + and r7,lr,r3,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r3 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r5,r5,r4 + eor r0,r0,r5 @ rk[4]=rk[0]^... + eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] + str r0,[r11],#16 + eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] + str r1,[r11,#-12] + eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] + str r2,[r11,#-8] + subs r12,r12,#1 + str r3,[r11,#-4] + bne .L128_loop + sub r2,r11,#176 + b .Ldone + +.Lnot128: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#19] + ldrb r4,[r12,#18] + ldrb r5,[r12,#17] + ldrb r6,[r12,#16] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#23] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#22] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#21] + ldrb r6,[r12,#20] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#16] + ldr r9,[r12,#20] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + teq lr,#192 + bne .Lnot192 + mov r12,#12 + str r12,[r11,#240-24] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#8 + +.L192_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[6]=rk[0]^... + eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] + str r0,[r11],#24 + eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] + str r1,[r11,#-20] + eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] + str r2,[r11,#-16] + subs r12,r12,#1 + str r3,[r11,#-12] + subeq r2,r11,#216 + beq .Ldone + + ldr r7,[r11,#-32] + ldr r8,[r11,#-28] + eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] + eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] + str r7,[r11,#-8] + str r9,[r11,#-4] + b .L192_loop + +.Lnot192: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#27] + ldrb r4,[r12,#26] + ldrb r5,[r12,#25] + ldrb r6,[r12,#24] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#31] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#30] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#29] + ldrb r6,[r12,#28] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#24] + ldr r9,[r12,#28] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + mov r12,#14 + str r12,[r11,#240-32] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#7 + +.L256_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[8]=rk[0]^... + eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] + str r0,[r11],#32 + eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] + str r1,[r11,#-28] + eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] + str r2,[r11,#-24] + subs r12,r12,#1 + str r3,[r11,#-20] + subeq r2,r11,#256 + beq .Ldone + + and r5,lr,r3 + and r7,lr,r3,lsr#8 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#16 + ldrb r7,[r10,r7] + and r9,lr,r3,lsr#24 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#8 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r11,#-48] + orr r5,r5,r9,lsl#24 + + ldr r7,[r11,#-44] + ldr r8,[r11,#-40] + eor r4,r4,r5 @ rk[12]=rk[4]^... + ldr r9,[r11,#-36] + eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] + str r4,[r11,#-16] + eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] + str r7,[r11,#-12] + eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] + str r8,[r11,#-8] + str r9,[r11,#-4] + b .L256_loop + +.Ldone: mov r0,#0 + ldmia sp!,{r4-r12,lr} +.Labrt: tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key + +.global private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,%function +.align 5 +private_AES_set_decrypt_key: + str lr,[sp,#-4]! @ push lr +#if 0 + @ kernel does both of these in setkey so optimise this bit out by + @ expecting the key to already have the enc_key work done (see aes_glue.c) + bl _armv4_AES_set_encrypt_key +#else + mov r0,#0 +#endif + teq r0,#0 + ldrne lr,[sp],#4 @ pop lr + bne .Labrt + + stmdb sp!,{r4-r12} + + ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, + mov r11,r2 @ which is AES_KEY *key + mov r7,r2 + add r8,r2,r12,lsl#4 + +.Linv: ldr r0,[r7] + ldr r1,[r7,#4] + ldr r2,[r7,#8] + ldr r3,[r7,#12] + ldr r4,[r8] + ldr r5,[r8,#4] + ldr r6,[r8,#8] + ldr r9,[r8,#12] + str r0,[r8],#-16 + str r1,[r8,#16+4] + str r2,[r8,#16+8] + str r3,[r8,#16+12] + str r4,[r7],#16 + str r5,[r7,#-12] + str r6,[r7,#-8] + str r9,[r7,#-4] + teq r7,r8 + bne .Linv + ldr r0,[r11,#16]! @ prefetch tp1 + mov r7,#0x80 + mov r8,#0x1b + orr r7,r7,#0x8000 + orr r8,r8,#0x1b00 + orr r7,r7,r7,lsl#16 + orr r8,r8,r8,lsl#16 + sub r12,r12,#1 + mvn r9,r7 + mov r12,r12,lsl#2 @ (rounds-1)*4 + +.Lmix: and r4,r0,r7 + and r1,r0,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r1,r4,r1,lsl#1 @ tp2 + + and r4,r1,r7 + and r2,r1,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r2,r4,r2,lsl#1 @ tp4 + + and r4,r2,r7 + and r3,r2,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r3,r4,r3,lsl#1 @ tp8 + + eor r4,r1,r2 + eor r5,r0,r3 @ tp9 + eor r4,r4,r3 @ tpe + eor r4,r4,r1,ror#24 + eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) + eor r4,r4,r2,ror#16 + eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) + eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) + + ldr r0,[r11,#4] @ prefetch tp1 + str r4,[r11],#4 + subs r12,r12,#1 + bne .Lmix + + mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key + +.type AES_Td,%object +.align 5 +AES_Td: +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +@ Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +.size AES_Td,.-AES_Td + +@ void AES_decrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_decrypt +.type AES_decrypt,%function +.align 5 +AES_decrypt: + sub r3,pc,#8 @ AES_decrypt + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_decrypt-AES_Td @ Td +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_decrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_decrypt,.-AES_decrypt + +.type _armv4_AES_decrypt,%function +.align 2 +_armv4_AES_decrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0,lsr#16 + and r8,lr,r0,lsr#8 + and r9,lr,r0 + mov r0,r0,lsr#24 +.Ldec_loop: + ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] + and r7,lr,r1 @ i0 + ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] + and r8,lr,r1,lsr#16 + ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] + ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] + ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] + eor r0,r0,r7,ror#24 + ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,ror#8 + and r8,lr,r2 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r2,lsr#16 + ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] + eor r1,r1,r4,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] + and r7,lr,r3,lsr#16 @ i0 + eor r1,r1,r8,ror#24 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r3 @ i2 + ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] + eor r2,r2,r5,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] + eor r0,r0,r7,ror#8 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] + eor r2,r2,r9,ror#24 + + ldr r4,[r11,#-12] + eor r0,r0,r7 + ldr r5,[r11,#-8] + eor r3,r3,r6,ror#8 + ldr r6,[r11,#-4] + and r7,lr,r0,lsr#16 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Ldec_loop + + add r10,r10,#1024 + + ldr r5,[r10,#0] @ prefetch Td4 + ldr r6,[r10,#32] + ldr r4,[r10,#64] + ldr r5,[r10,#96] + ldr r6,[r10,#128] + ldr r4,[r10,#160] + ldr r5,[r10,#192] + ldr r6,[r10,#224] + + ldrb r0,[r10,r0] @ Td4[s0>>24] + ldrb r4,[r10,r7] @ Td4[s0>>16] + and r7,lr,r1 @ i0 + ldrb r5,[r10,r8] @ Td4[s0>>8] + and r8,lr,r1,lsr#16 + ldrb r6,[r10,r9] @ Td4[s0>>0] + and r9,lr,r1,lsr#8 + + ldrb r7,[r10,r7] @ Td4[s1>>0] + ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] + ldrb r8,[r10,r8] @ Td4[s1>>16] + eor r0,r7,r0,lsl#24 + ldrb r9,[r10,r9] @ Td4[s1>>8] + eor r1,r4,r1,lsl#8 + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,lsl#8 + and r8,lr,r2 @ i1 + ldrb r7,[r10,r7] @ Td4[s2>>8] + eor r6,r6,r9,lsl#8 + ldrb r8,[r10,r8] @ Td4[s2>>0] + and r9,lr,r2,lsr#16 + + ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] + eor r0,r0,r7,lsl#8 + ldrb r9,[r10,r9] @ Td4[s2>>16] + eor r1,r8,r1,lsl#16 + and r7,lr,r3,lsr#16 @ i0 + eor r2,r5,r2,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + ldrb r7,[r10,r7] @ Td4[s3>>16] + eor r6,r6,r9,lsl#16 + ldrb r8,[r10,r8] @ Td4[s3>>8] + and r9,lr,r3 @ i2 + + ldrb r9,[r10,r9] @ Td4[s3>>0] + ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] + eor r0,r0,r7,lsl#16 + ldr r7,[r11,#0] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r9,r2,lsl#8 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#1024 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_decrypt,.-_armv4_AES_decrypt +.asciz "AES for ARMv4, CRYPTOGAMS by " +.align 2 diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c new file mode 100644 index 00000000000..59f7877ead6 --- /dev/null +++ b/arch/arm/crypto/aes_glue.c @@ -0,0 +1,108 @@ +/* + * Glue Code for the asm optimized version of the AES Cipher Algorithm + */ + +#include +#include +#include + +#define AES_MAXNR 14 + +typedef struct { + unsigned int rd_key[4 *(AES_MAXNR + 1)]; + int rounds; +} AES_KEY; + +struct AES_CTX { + AES_KEY enc_key; + AES_KEY dec_key; +}; + +asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx); +asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx); +asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); +asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct AES_CTX *ctx = crypto_tfm_ctx(tfm); + AES_encrypt(src, dst, &ctx->enc_key); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct AES_CTX *ctx = crypto_tfm_ctx(tfm); + AES_decrypt(src, dst, &ctx->dec_key); +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct AES_CTX *ctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case AES_KEYSIZE_128: + key_len = 128; + break; + case AES_KEYSIZE_192: + key_len = 192; + break; + case AES_KEYSIZE_256: + key_len = 256; + break; + default: + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + /* private_AES_set_decrypt_key expects an encryption key as input */ + ctx->dec_key = ctx->enc_key; + if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + return 0; +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct AES_CTX), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("aes"); +MODULE_ALIAS("aes-asm"); +MODULE_AUTHOR("David McCullough "); diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S new file mode 100644 index 00000000000..7050ab133b9 --- /dev/null +++ b/arch/arm/crypto/sha1-armv4-large.S @@ -0,0 +1,503 @@ +#define __ARM_ARCH__ __LINUX_ARM_ARCH__ +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ sha1_block procedure for ARMv4. +@ +@ January 2007. + +@ Size/performance trade-off +@ ==================================================================== +@ impl size in bytes comp cycles[*] measured performance +@ ==================================================================== +@ thumb 304 3212 4420 +@ armv4-small 392/+29% 1958/+64% 2250/+96% +@ armv4-compact 740/+89% 1552/+26% 1840/+22% +@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] +@ full unroll ~5100/+260% ~1260/+4% ~1300/+5% +@ ==================================================================== +@ thumb = same as 'small' but in Thumb instructions[**] and +@ with recurring code in two private functions; +@ small = detached Xload/update, loops are folded; +@ compact = detached Xload/update, 5x unroll; +@ large = interleaved Xload/update, 5x unroll; +@ full unroll = interleaved Xload/update, full unroll, estimated[!]; +@ +@ [*] Manually counted instructions in "grand" loop body. Measured +@ performance is affected by prologue and epilogue overhead, +@ i-cache availability, branch penalties, etc. +@ [**] While each Thumb instruction is twice smaller, they are not as +@ diverse as ARM ones: e.g., there are only two arithmetic +@ instructions with 3 arguments, no [fixed] rotate, addressing +@ modes are limited. As result it takes more instructions to do +@ the same job in Thumb, therefore the code is never twice as +@ small and always slower. +@ [***] which is also ~35% better than compiler generated code. Dual- +@ issue Cortex A8 core was measured to process input block in +@ ~990 cycles. + +@ August 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 13% improvement on +@ Cortex A8 core and in absolute terms ~870 cycles per input block +@ [or 13.6 cycles per byte]. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 10% +@ improvement on Cortex A8 core and 12.2 cycles per byte. + +.text + +.global sha1_block_data_order +.type sha1_block_data_order,%function + +.align 2 +sha1_block_data_order: + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) + teq r14,sp + bne .L_00_15 @ [((11+4)*5+2)*3] +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + sub sp,sp,#25*4 + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) + teq r14,sp @ preserve carry + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 + teq r14,sp + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.align 2 +.LK_00_19: .word 0x5a827999 +.LK_20_39: .word 0x6ed9eba1 +.LK_40_59: .word 0x8f1bbcdc +.LK_60_79: .word 0xca62c1d6 +.size sha1_block_data_order,.-sha1_block_data_order +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " +.align 2 diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c new file mode 100644 index 00000000000..76cd976230b --- /dev/null +++ b/arch/arm/crypto/sha1_glue.c @@ -0,0 +1,179 @@ +/* + * Cryptographic API. + * Glue code for the SHA1 Secure Hash Algorithm assembler implementation + * + * This file is based on sha1_generic.c and sha1_ssse3_glue.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct SHA1_CTX { + uint32_t h0,h1,h2,h3,h4; + u64 count; + u8 data[SHA1_BLOCK_SIZE]; +}; + +asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest, + const unsigned char *data, unsigned int rounds); + + +static int sha1_init(struct shash_desc *desc) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + memset(sctx, 0, sizeof(*sctx)); + sctx->h0 = SHA1_H0; + sctx->h1 = SHA1_H1; + sctx->h2 = SHA1_H2; + sctx->h3 = SHA1_H3; + sctx->h4 = SHA1_H4; + return 0; +} + + +static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->data + partial, data, done); + sha1_block_data_order(sctx, sctx->data, 1); + } + + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + sha1_block_data_order(sctx, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->data, data + done, len - done); + return 0; +} + + +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->data + partial, data, len); + return 0; + } + res = __sha1_update(sctx, data, len, partial); + return res; +} + + +/* Add padding and return the message digest. */ +static int sha1_final(struct shash_desc *desc, u8 *out) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + /* We need to fill a whole block for __sha1_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->data + index, padding, padlen); + } else { + __sha1_update(sctx, padding, padlen, index); + } + __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(((u32 *)sctx)[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + return 0; +} + + +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct SHA1_CTX *sctx = shash_desc_ctx(desc); + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_init, + .update = sha1_update, + .final = sha1_final, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct SHA1_CTX), + .statesize = sizeof(struct SHA1_CTX), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + + +static int __init sha1_mod_init(void) +{ + return crypto_register_shash(&alg); +} + + +static void __exit sha1_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + + +module_init(sha1_mod_init); +module_exit(sha1_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)"); +MODULE_ALIAS("sha1"); +MODULE_AUTHOR("David McCullough "); diff --git a/crypto/Kconfig b/crypto/Kconfig index ae27b7534ea..49b5dcf58e4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -407,6 +407,15 @@ config CRYPTO_SHA1 help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). +config CRYPTO_SHA1_ARM + tristate "SHA1 digest algorithm (ARM-asm)" + depends on ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM assembler. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH @@ -562,6 +571,30 @@ config CRYPTO_AES_NI_INTEL ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional acceleration for CTR. +config CRYPTO_AES_ARM + tristate "AES cipher algorithms (ARM-asm)" + depends on ARM + select CRYPTO_ALGAPI + select CRYPTO_AES + help + Use optimized AES assembler routines for ARM platforms. + + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See for more information. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI From 6d98c9a3e5c759493e1138ccc739e870d9815f07 Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 29 Jul 2012 18:08:25 -0700 Subject: [PATCH 147/678] staging: android/lowmemorykiller: Don't grab tasklist_lock Grabbing tasklist_lock has its disadvantages, i.e. it blocks process creation and destruction. If there are lots of processes, blocking doesn't sound as a great idea. For LMK, it is sufficient to surround tasks list traverse with rcu_read_{,un}lock(). >From now on using force_sig() is not safe, as it can race with an already exiting task, so we use send_sig() now. As a downside, it won't kill PID namespace init processes, but that's not what we want anyway. Suggested-by: Oleg Nesterov Signed-off-by: Anton Vorontsov Reviewed-by: Oleg Nesterov Signed-off-by: Greg Kroah-Hartman modified for HTC kernel from Linux 3.4+ by faux123 Conflicts: drivers/staging/android/lowmemorykiller.c --- drivers/staging/android/lowmemorykiller.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index a4beb77e7b1..2d38a2771dc 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -134,7 +135,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) } selected_oom_adj = min_adj; - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; @@ -175,12 +176,12 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; - force_sig(SIGKILL, selected); + send_sig(SIGKILL, selected, 0); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); - read_unlock(&tasklist_lock); + rcu_read_unlock(); if (selected) compact_nodes(false); return rem; From 910f03313aaf80f506610e1aed8c8be630eb05a5 Mon Sep 17 00:00:00 2001 From: faux123 Date: Thu, 10 May 2012 23:11:15 -0700 Subject: [PATCH 148/678] staging: android/lowmemorykiller: Better mm handling LMK should not directly check for task->mm. The reason is that the process' threads may exit or detach its mm via use_mm(), but other threads may still have a valid mm. To catch this we use find_lock_task_mm(), which walks up all threads and returns an appropriate task (with lock held). Suggested-by: Oleg Nesterov Reviewed-by: Oleg Nesterov Signed-off-by: Anton Vorontsov Acked-by: KOSAKI Motohiro Signed-off-by: Greg Kroah-Hartman modified for HTC kernel from Linux 3.4+ by faux123 --- drivers/staging/android/lowmemorykiller.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 2d38a2771dc..b43317c5b71 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -85,7 +85,7 @@ task_notify_func(struct notifier_block *self, unsigned long val, void *data) static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { - struct task_struct *p; + struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; @@ -136,15 +136,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) selected_oom_adj = min_adj; rcu_read_lock(); - for_each_process(p) { - struct mm_struct *mm; + for_each_process(tsk) { + struct task_struct *p; struct signal_struct *sig; int oom_adj; - task_lock(p); - mm = p->mm; + p = find_lock_task_mm(tsk); + if (!p) + continue; + sig = p->signal; - if (!mm || !sig) { + if (!sig) { task_unlock(p); continue; } @@ -153,7 +155,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) task_unlock(p); continue; } - tasksize = get_mm_rss(mm); + tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; From def6043303702206b6d828783303c2b0eeb0f5ba Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 29 Jul 2012 18:09:50 -0700 Subject: [PATCH 149/678] staging: android/lowmemorykiller: No need for task->signal check task->signal == NULL is not possible, so no need for these checks. Suggested-by: Oleg Nesterov Reviewed-by: Oleg Nesterov Signed-off-by: Anton Vorontsov Acked-by: KOSAKI Motohiro Signed-off-by: Greg Kroah-Hartman modified for HTC kernel from Linux 3.4+ by faux123 --- drivers/staging/android/lowmemorykiller.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index b43317c5b71..ae6edb8de9b 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -138,19 +138,13 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; - struct signal_struct *sig; int oom_adj; p = find_lock_task_mm(tsk); if (!p) continue; - sig = p->signal; - if (!sig) { - task_unlock(p); - continue; - } - oom_adj = sig->oom_adj; + oom_adj = p->signal->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; From 46f551de0d38cb33728239dc57b15e928f62d437 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 6 Feb 2012 20:30:01 +0400 Subject: [PATCH 150/678] staging: android/lowmemorykiller: Do not kill kernel threads LMK should not try killing kernel threads. Suggested-by: Oleg Nesterov Reviewed-by: Oleg Nesterov Signed-off-by: Anton Vorontsov Acked-by: KOSAKI Motohiro Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/lowmemorykiller.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index ae6edb8de9b..4d33c5addb9 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -140,6 +140,9 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) struct task_struct *p; int oom_adj; + if (tsk->flags & PF_KTHREAD) + continue; + p = find_lock_task_mm(tsk); if (!p) continue; From 34c1df1bbca3d6a82b09dea30d5546bb79f81fcc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 7 Mar 2012 13:21:23 -0800 Subject: [PATCH 151/678] Staging: android: lowmemorykiller.c Fix compiler warning about the type of the module parameter. Cc: San Mehat Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/lowmemorykiller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 4d33c5addb9..aa3f7b4b655 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -46,7 +46,7 @@ static int lowmem_adj[6] = { 12, }; static int lowmem_adj_size = 4; -static size_t lowmem_minfree[6] = { +static int lowmem_minfree[6] = { 3 * 512, /* 6MB */ 2 * 1024, /* 8MB */ 4 * 1024, /* 16MB */ From 1eaa67f41e8c8220a63da7375169c4413fc97163 Mon Sep 17 00:00:00 2001 From: TripNRaVeR Date: Thu, 22 Mar 2012 00:11:14 +0100 Subject: [PATCH 152/678] vfp: compile with neon --- arch/arm/vfp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index 6de73aab019..ec624f0150c 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -7,7 +7,7 @@ # ccflags-y := -DDEBUG # asflags-y := -DDEBUG -KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp) +KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=neon) LDFLAGS +=--no-warn-mismatch obj-y += vfp.o From a21f6cab2ca55f76ace53f6efe3fc56522cd9cdb Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Fri, 11 Nov 2011 23:04:09 -0800 Subject: [PATCH 153/678] sched: don't call task_group() many times in set_task_rq() It improves perfomance, especially if autogroup enabled. The size of set_task_rq() was 0x180 and now it is 0xa0. Signed-off-by: Andrew Vagin --- kernel/sched.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index eeed910c36d..f8d81ce1961 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -635,14 +635,18 @@ static inline struct task_group *task_group(struct task_struct *p) /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { +#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED) + struct task_group *tg = task_group(p); +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED - p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; - p->se.parent = task_group(p)->se[cpu]; + p->se.cfs_rq = tg->cfs_rq[cpu]; + p->se.parent = tg->se[cpu]; #endif #ifdef CONFIG_RT_GROUP_SCHED - p->rt.rt_rq = task_group(p)->rt_rq[cpu]; - p->rt.parent = task_group(p)->rt_se[cpu]; + p->rt.rt_rq = tg->rt_rq[cpu]; + p->rt.parent = tg->rt_se[cpu]; #endif } From 0dbe8d7d2eb8a03a1cb6a04c5592278edc87d53a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 10 May 2012 07:17:28 +0200 Subject: [PATCH 154/678] [PATCH] ARM: entry: no need to increase preempt count for IRQ handlers From: Russell King Date: Sat, 25 Jun 2011 09:57:57 +0000 (+0100) Subject: ARM: entry: no need to increase preempt count for IRQ handlers X-Git-Tag: v3.1-rc1~258^2~1^3~16 X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=1613cc1119ecdb1bdb950da53065e615e4c4b8db irq_enter() and irq_exit() already take care of the preempt_count handling for interrupts, which increment and decrement the hardirq bits of the preempt count. So we can remove the preempt count handing in our IRQ entry/exit assembly, like x86 did some 9 years ago. Signed-off-by: Russell King Conflicts: arch/arm/kernel/entry-armv.S --- arch/arm/kernel/entry-armv.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 4f8e30f183b..2be87bf0610 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -208,6 +208,11 @@ ENDPROC(__dabt_svc) .align 5 __irq_svc: svc_entry + +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif + irq_handler #ifdef CONFIG_PREEMPT From c6cd2ac386ba57a5374d9c96c9d8f863a9b931d1 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Mon, 13 Aug 2012 17:24:24 +0200 Subject: [PATCH 155/678] lib/lzo: Rename lzo1x_decompress.c to lzo1x_decompress_safe.c Rename the source file to match the function name and thereby also make room for a possible future even slightly faster "non-safe" decompressor version. Signed-off-by: Markus F.X.J. Oberhumer --- lib/lzo/Makefile | 2 +- lib/lzo/{lzo1x_decompress.c => lzo1x_decompress_safe.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename lib/lzo/{lzo1x_decompress.c => lzo1x_decompress_safe.c} (100%) diff --git a/lib/lzo/Makefile b/lib/lzo/Makefile index e764116ea12..f0f7d7ca2b8 100644 --- a/lib/lzo/Makefile +++ b/lib/lzo/Makefile @@ -1,5 +1,5 @@ lzo_compress-objs := lzo1x_compress.o -lzo_decompress-objs := lzo1x_decompress.o +lzo_decompress-objs := lzo1x_decompress_safe.o obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress_safe.c similarity index 100% rename from lib/lzo/lzo1x_decompress.c rename to lib/lzo/lzo1x_decompress_safe.c From 66836ecd23352d11ac0fe29684d704e4716e77f9 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Mon, 13 Aug 2012 17:25:44 +0200 Subject: [PATCH 156/678] lib/lzo: Update LZO compression to current upstream version This commit updates the kernel LZO code to the current upsteam version which features a significant speed improvement - benchmarking the Calgary and Silesia test corpora typically shows a doubled performance in both compression and decompression on modern i386/x86_64/powerpc machines. Signed-off-by: Markus F.X.J. Oberhumer --- include/linux/lzo.h | 15 +- lib/lzo/lzo1x_compress.c | 309 ++++++++++++++++------------- lib/lzo/lzo1x_decompress_safe.c | 341 +++++++++++++++----------------- lib/lzo/lzodefs.h | 34 +++- 4 files changed, 360 insertions(+), 339 deletions(-) diff --git a/include/linux/lzo.h b/include/linux/lzo.h index d793497ec1c..a0848d9377e 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -4,28 +4,28 @@ * LZO Public Kernel Interface * A mini subset of the LZO real-time data compression library * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) -#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS +#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) -/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzo1x_1_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); + unsigned char *dst, size_t *dst_len, void *wrkmem); /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len); + unsigned char *dst, size_t *dst_len); /* * Return values (< 0 = Error) @@ -40,5 +40,6 @@ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, #define LZO_E_EOF_NOT_FOUND (-7) #define LZO_E_INPUT_NOT_CONSUMED (-8) #define LZO_E_NOT_YET_IMPLEMENTED (-9) +#define LZO_E_INVALID_ARGUMENT (-10) #endif diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index a6040990a62..d42efe514aa 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -1,194 +1,217 @@ /* - * LZO1X Compressor from MiniLZO + * LZO1X Compressor from LZO * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ #include #include -#include #include +#include #include "lzodefs.h" static noinline size_t -_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len, void *wrkmem) +lzo1x_1_do_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + size_t ti, void *wrkmem) { + const unsigned char *ip; + unsigned char *op; const unsigned char * const in_end = in + in_len; - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; - const unsigned char ** const dict = wrkmem; - const unsigned char *ip = in, *ii = ip; - const unsigned char *end, *m, *m_pos; - size_t m_off, m_len, dindex; - unsigned char *op = out; + const unsigned char * const ip_end = in + in_len - 20; + const unsigned char *ii; + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; - ip += 4; + op = out; + ip = in; + ii = ip; + ip += ti < 4 ? 4 - ti : 0; for (;;) { - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - goto literal; - -try_match: - if (get_unaligned((const unsigned short *)m_pos) - == get_unaligned((const unsigned short *)ip)) { - if (likely(m_pos[2] == ip[2])) - goto match; - } - + const unsigned char *m_pos; + size_t t, m_len, m_off; + u32 dv; literal: - dict[dindex] = ip; - ++ip; + ip += 1 + ((ip - ii) >> 5); +next: if (unlikely(ip >= ip_end)) break; - continue; - -match: - dict[dindex] = ip; - if (ip != ii) { - size_t t = ip - ii; + dv = get_unaligned_le32(ip); + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; + m_pos = in + dict[t]; + dict[t] = (lzo_dict_t) (ip - in); + if (unlikely(dv != get_unaligned_le32(m_pos))) + goto literal; + ii -= ti; + ti = 0; + t = ip - ii; + if (t != 0) { if (t <= 3) { op[-2] |= t; - } else if (t <= 18) { + COPY4(op, ii); + op += t; + } else if (t <= 16) { *op++ = (t - 3); + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += t; } else { - size_t tt = t - 18; - - *op++ = 0; - while (tt > 255) { - tt -= 255; + if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; *op++ = 0; + while (unlikely(tt > 255)) { + tt -= 255; + *op++ = 0; + } + *op++ = tt; } - *op++ = tt; + do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { + *op++ = *ii++; + } while (--t > 0); } - do { - *op++ = *ii++; - } while (--t > 0); } - ip += 3; - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ - || m_pos[5] != *ip++ || m_pos[6] != *ip++ - || m_pos[7] != *ip++ || m_pos[8] != *ip++) { - --ip; - m_len = ip - ii; + m_len = 4; + { +#if defined(LZO_USE_CTZ64) + u64 v; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 8; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctzll(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clzll(v) / 8; +# else +# error "missing endian definition" +# endif +#elif defined(LZO_USE_CTZ32) + u32 v; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 4; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctz(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clz(v) / 8; +# else +# error "missing endian definition" +# endif +#else + if (unlikely(ip[m_len] == m_pos[m_len])) { + do { + m_len += 1; + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (ip[m_len] == m_pos[m_len]); + } +#endif + } +m_len_done: - if (m_off <= M2_MAX_OFFSET) { - m_off -= 1; - *op++ = (((m_len - 1) << 5) - | ((m_off & 7) << 2)); - *op++ = (m_off >> 3); - } else if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; + m_off = ip - m_pos; + ip += m_len; + ii = ip; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { + m_off -= 1; + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = (m_off >> 3); + } else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= M3_MAX_LEN) *op++ = (M3_MARKER | (m_len - 2)); - goto m3_m4_offset; - } else { - m_off -= 0x4000; - - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) - | (m_len - 2)); - goto m3_m4_offset; + else { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; + } + *op++ = (m_len); } + *op++ = (m_off << 2); + *op++ = (m_off >> 6); } else { - end = in_end; - m = m_pos + M2_MAX_LEN + 1; - - while (ip < end && *m == *ip) { - m++; - ip++; - } - m_len = ip - ii; - - if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; - if (m_len <= 33) { - *op++ = (M3_MARKER | (m_len - 2)); - } else { - m_len -= 33; - *op++ = M3_MARKER | 0; - goto m3_m4_len; - } - } else { - m_off -= 0x4000; - if (m_len <= M4_MAX_LEN) { - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11) + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) + *op++ = (M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); - } else { - m_len -= M4_MAX_LEN; - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11)); -m3_m4_len: - while (m_len > 255) { - m_len -= 255; - *op++ = 0; - } - - *op++ = (m_len); + else { + m_len -= M4_MAX_LEN; + *op++ = (M4_MARKER | ((m_off >> 11) & 8)); + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; } + *op++ = (m_len); } -m3_m4_offset: - *op++ = ((m_off & 63) << 2); + *op++ = (m_off << 2); *op++ = (m_off >> 6); } - - ii = ip; - if (unlikely(ip >= ip_end)) - break; + goto next; } - *out_len = op - out; - return in_end - ii; + return in_end - (ii - ti); } -int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, - size_t *out_len, void *wrkmem) +int lzo1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) { - const unsigned char *ii; + const unsigned char *ip = in; unsigned char *op = out; - size_t t; + size_t l = in_len; + size_t t = 0; - if (unlikely(in_len <= M2_MAX_LEN + 5)) { - t = in_len; - } else { - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); + while (l > 20) { + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); + uintptr_t ll_end = (uintptr_t) ip + ll; + if ((ll_end + ((t + ll) >> 5)) <= ll_end) + break; + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); + ip += ll; op += *out_len; + l -= ll; } + t += l; if (t > 0) { - ii = in + in_len - t; + const unsigned char *ii = in + in_len - t; if (op == out && t <= 238) { *op++ = (17 + t); @@ -198,16 +221,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, *op++ = (t - 3); } else { size_t tt = t - 18; - *op++ = 0; while (tt > 255) { tt -= 255; *op++ = 0; } - *op++ = tt; } - do { + if (t >= 16) do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { *op++ = *ii++; } while (--t > 0); } @@ -223,4 +251,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X-1 Compressor"); - diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index f2fd0985022..0dba30ce1c7 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -1,12 +1,12 @@ /* - * LZO1X Decompressor from MiniLZO + * LZO1X Decompressor from LZO * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ @@ -15,225 +15,198 @@ #include #include #endif - #include #include #include "lzodefs.h" -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) - -#define COPY4(dst, src) \ - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len) + unsigned char *out, size_t *out_len) { + unsigned char *op; + const unsigned char *ip; + size_t t, next; + size_t state = 0; + const unsigned char *m_pos; const unsigned char * const ip_end = in + in_len; unsigned char * const op_end = out + *out_len; - const unsigned char *ip = in, *m_pos; - unsigned char *op = out; - size_t t; - *out_len = 0; + op = out; + ip = in; + if (unlikely(in_len < 3)) + goto input_overrun; if (*ip > 17) { t = *ip++ - 17; - if (t < 4) + if (t < 4) { + next = t; goto match_next; - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - do { - *op++ = *ip++; - } while (--t > 0); - goto first_literal_run; - } - - while ((ip < ip_end)) { - t = *ip++; - if (t >= 16) - goto match; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 15 + *ip++; - } - if (HAVE_OP(t + 3, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 4, ip_end, ip)) - goto input_overrun; - - COPY4(op, ip); - op += 4; - ip += 4; - if (--t > 0) { - if (t >= 4) { - do { - COPY4(op, ip); - op += 4; - ip += 4; - t -= 4; - } while (t >= 4); - if (t > 0) { - do { - *op++ = *ip++; - } while (--t > 0); - } - } else { - do { - *op++ = *ip++; - } while (--t > 0); - } } + goto copy_literal_run; + } -first_literal_run: + for (;;) { t = *ip++; - if (t >= 16) - goto match; - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - - if (HAVE_OP(3, op_end, op)) - goto output_overrun; - *op++ = *m_pos++; - *op++ = *m_pos++; - *op++ = *m_pos; - - goto match_done; - - do { -match: - if (t >= 64) { - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - goto copy_match; - } else if (t >= 32) { - t &= 31; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { + if (t < 16) { + if (likely(state == 0)) { + if (unlikely(t == 0)) { + while (unlikely(*ip == 0)) { t += 255; ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; + NEED_IP(1); } - t += 31 + *ip++; + t += 15 + *ip++; } - m_pos = op - 1; - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - } else if (t >= 16) { - m_pos = op; - m_pos -= (t & 8) << 11; - - t &= 7; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 7 + *ip++; + t += 3; +copy_literal_run: + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { + const unsigned char *ie = ip + t; + unsigned char *oe = op + t; + do { + COPY8(op, ip); + op += 8; + ip += 8; + COPY8(op, ip); + op += 8; + ip += 8; + } while (ip < ie); + ip = ie; + op = oe; + } else { + NEED_OP(t); + NEED_IP(t + 3); + do { + *op++ = *ip++; + } while (--t > 0); } - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; - } else { + state = 4; + continue; + } else if (state != 4) { + next = t & 3; m_pos = op - 1; m_pos -= t >> 2; m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(2, op_end, op)) - goto output_overrun; - - *op++ = *m_pos++; - *op++ = *m_pos; - goto match_done; + TEST_LB(m_pos); + NEED_OP(2); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + goto match_next; + } else { + next = t & 3; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + t = 3; } - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4 - (3 - 1); + } else if (t >= 64) { + next = t & 3; + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1 + (3 - 1); + } else if (t >= 32) { + t = (t & 31) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + NEED_IP(2); + } + m_pos = op - 1; + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + } else { + m_pos = op; + m_pos -= (t & 8) << 11; + t = (t & 7) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + NEED_IP(2); + } + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } + TEST_LB(m_pos); + if (op - m_pos >= 8) { + unsigned char *oe = op + t; + if (likely(HAVE_OP(t + 15))) { do { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4; - } while (t >= 4); - if (t > 0) - do { - *op++ = *m_pos++; - } while (--t > 0); + COPY8(op, m_pos); + op += 8; + m_pos += 8; + COPY8(op, m_pos); + op += 8; + m_pos += 8; + } while (op < oe); + op = oe; + if (HAVE_IP(6)) { + state = next; + COPY4(op, ip); + op += next; + ip += next; + continue; + } } else { -copy_match: - *op++ = *m_pos++; - *op++ = *m_pos++; + NEED_OP(t); do { *op++ = *m_pos++; - } while (--t > 0); + } while (op < oe); } -match_done: - t = ip[-2] & 3; - if (t == 0) - break; + } else { + unsigned char *oe = op + t; + NEED_OP(t); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + m_pos += 2; + do { + *op++ = *m_pos++; + } while (op < oe); + } match_next: - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - - *op++ = *ip++; - if (t > 1) { + state = next; + t = next; + if (likely(HAVE_IP(6) && HAVE_OP(4))) { + COPY4(op, ip); + op += t; + ip += t; + } else { + NEED_IP(t + 3); + NEED_OP(t); + while (t > 0) { *op++ = *ip++; - if (t > 2) - *op++ = *ip++; + t--; } - - t = *ip++; - } while (ip < ip_end); + } } - *out_len = op - out; - return LZO_E_EOF_NOT_FOUND; - eof_found: *out_len = op - out; - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + return (t != 3 ? LZO_E_ERROR : + ip == ip_end ? LZO_E_OK : + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); + input_overrun: *out_len = op - out; return LZO_E_INPUT_OVERRUN; diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index b6d482c492e..ddc8db510d8 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -1,19 +1,37 @@ /* * lzodefs.h -- architecture, OS and compiler specific defines * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO_VERSION 0x2020 -#define LZO_VERSION_STRING "2.02" -#define LZO_VERSION_DATE "Oct 17 2005" + +#define COPY4(dst, src) \ + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#if defined(__x86_64__) +#define COPY8(dst, src) \ + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) +#else +#define COPY8(dst, src) \ + COPY4(dst, src); COPY4((dst) + 4, (src) + 4) +#endif + +#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) +#error "conflicting endian definitions" +#elif defined(__x86_64__) +#define LZO_USE_CTZ64 1 +#define LZO_USE_CTZ32 1 +#elif defined(__i386__) || defined(__powerpc__) +#define LZO_USE_CTZ32 1 +#else +#define LZO_USE_CTZ32 1 +#endif #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 @@ -34,8 +52,10 @@ #define M3_MARKER 32 #define M4_MARKER 16 -#define D_BITS 14 -#define D_MASK ((1u << D_BITS) - 1) +#define lzo_dict_t unsigned short +#define D_BITS 13 +#define D_SIZE (1u << D_BITS) +#define D_MASK (D_SIZE - 1) #define D_HIGH ((D_MASK >> 1) + 1) #define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ From 8434bbe1302ef8dd1d611e2678eb846ba636e125 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 17:30:15 -0500 Subject: [PATCH 157/678] Makefile optimizations - More optimized compiler options for makefile - Switch to -mtune=cortex-a8 in makefile, arm/makefile, and kernel/makefile as it appears to produce more efficient code --- Makefile | 13 +++++++------ arch/arm/Makefile | 2 +- kernel/Makefile | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 053b84d4866..708bac93604 100644 --- a/Makefile +++ b/Makefile @@ -347,10 +347,11 @@ CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) -CFLAGS_MODULE = -O2 -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize -AFLAGS_MODULE = -LDFLAGS_MODULE = -CFLAGS_KERNEL = -O2 -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize +MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +CFLAGS_MODULE = $(MODFLAGS) +AFLAGS_MODULE = $(MODFLAGS) +LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds +CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage @@ -369,9 +370,9 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -Werror-implicit-function-declaration \ -Wno-format-security \ -fno-delete-null-pointer-checks -mno-unaligned-access \ - -mtune=cortex-a9 -march=armv7-a -mfpu=neon \ + -mtune=cortex-a8 -march=armv7-a -mfpu=neon \ -fpredictive-commoning -fgcse-after-reload -ftree-vectorize \ - -fipa-cp-clone -fsingle-precision-constant -pipe \ + -fipa-cp-clone -fsingle-precision-constant \ -funswitch-loops -floop-interchange \ -floop-strip-mine -floop-block diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4d81b7fbb0a..9a208d1a217 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -58,7 +58,7 @@ comma = , # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. #arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv7-a -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv7-a -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 diff --git a/kernel/Makefile b/kernel/Makefile index 9b45c84662f..7595272d146 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -116,7 +116,7 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o # I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k # to get a correct value for the wait-channel (WCHAN in ps). --davidm #CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer -CFLAGS_sched.o := -O2 -fomit-frame-pointer -mtune=cortex-a9 -march=armv7-a -ftree-vectorize +CFLAGS_sched.o := -O2 -fomit-frame-pointer -mtune=cortex-a8 -march=armv7-a -ftree-vectorize #endif $(obj)/configs.o: $(obj)/config_data.h From 58e0d1bb0e372b058e8c93b1d8af9accd58ccd48 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 18:26:11 -0500 Subject: [PATCH 158/678] lib: lzo: decompress_unlzo.c: fix to allow lzo kernel compression with lzo1x_decompress_safe.c --- lib/decompress_unlzo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index 5a7a2adf4c4..26f89ad0330 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -31,7 +31,7 @@ */ #ifdef STATIC -#include "lzo/lzo1x_decompress.c" +#include "lzo/lzo1x_decompress_safe.c" #else #include #endif From fd0383700a3c5caf8782c07112c7ab8213ba76fe Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 18:46:55 -0500 Subject: [PATCH 159/678] Revert "Revert " arm: tegra: usb: phy: fix hotplug function"" This reverts commit 325166496a39079c580693186fc508f4afdc8798. --- arch/arm/mach-tegra/usb_phy.c | 6 +++--- drivers/usb/host/ehci-tegra.c | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index e40e2415c5a..6e84e3d8279 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -865,7 +865,7 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy) val |= HOSTPC1_DEVLC_PHCD; writel(val, base + HOSTPC1_DEVLC); #endif - if (phy->instance == 2) { + if (phy->hotplug) { val = readl(base + USB_SUSP_CTRL); val |= USB_PHY_CLK_VALID_INT_ENB; writel(val, base + USB_SUSP_CTRL); @@ -1482,7 +1482,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) writel(val, base + UTMIP_BAT_CHRG_CFG0); } - if (phy->instance != 2) { + if (!phy->hotplug) { val = readl(base + UTMIP_XCVR_CFG0); val |= (UTMIP_FORCE_PD_POWERDOWN | UTMIP_FORCE_PD2_POWERDOWN | UTMIP_FORCE_PDZI_POWERDOWN); @@ -1512,7 +1512,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) utmi_phy_clk_disable(phy); - utmip_pad_power_off(phy, true); + utmip_pad_power_off(phy, is_dpd); return 0; } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index cad3bbb2942..76f40688f82 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -208,7 +208,6 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB | TEGRA_USB_PHY_CLK_VALID_INT_STS; writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); - val = readl(&hw->status); if (!(val & STS_PCD)) { spin_unlock(&ehci->lock); @@ -218,6 +217,12 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~(TEGRA_USB_PORTSC1_WKCN | PORT_RWC_BITS); writel(val , (hcd->regs + TEGRA_USB_PORTSC1_OFFSET)); } + else if (tegra->bus_suspended && + tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { + printk("%s: no device connected before suspend\n", __func__); + spin_unlock(&ehci->lock); + return 0; + } spin_unlock(&ehci->lock); } @@ -1366,6 +1371,7 @@ static int tegra_ehci_remove(struct platform_device *pdev) usb_remove_hcd(hcd); usb_put_hcd(hcd); tegra_usb_phy_power_off(tegra->phy, true); + tegra_ehci_disable_phy_interrupt(hcd); tegra_usb_phy_close(tegra->phy); iounmap(hcd->regs); From 10d0bd6bfce4f7924f6a407bc17febe012b4012d Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 20:32:19 -0500 Subject: [PATCH 160/678] mach-tegra: add 700Mhz LP overclock option --- arch/arm/mach-tegra/Kconfig | 2 ++ arch/arm/mach-tegra/tegra3_clocks.c | 37 ++++++++++++++++++++++++++--- arch/arm/mach-tegra/tegra3_dvfs.c | 3 +++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index f82c2ac9f43..44ecaa48bce 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -329,6 +329,8 @@ choice bool "550 MHz" config LP_OC_620 bool "620 MHz" + config LP_OC_700 + bool "700 MHz" endchoice diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 902c2088fdb..df9af77ba97 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3905,7 +3905,7 @@ static struct clk tegra_clk_cclk_lp = { .inputs = mux_cclk_lp, .reg = 0x370, .ops = &tegra_super_ops, - .max_rate = 620000000, + .max_rate = 720000000, }; static struct clk tegra_clk_sclk = { @@ -3933,7 +3933,7 @@ static struct clk tegra_clk_virtual_cpu_lp = { .name = "cpu_lp", .parent = &tegra_clk_cclk_lp, .ops = &tegra_cpu_ops, - .max_rate = 620000000, + .max_rate = 720000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -4644,15 +4644,45 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { { 3, 340000 }, { 4, 475000 }, #ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_700 + { 5, 550000 }, + { 5, 700000 }, + { 7, 900000 }, + { 8, 1000000 }, + { 9, 1100000 }, + {10, 1200000 }, + {11, 1300000 }, + {12, 1400000 }, + {13, 1500000 }, + {14, 1600000 }, + {15, CPUFREQ_TABLE_END }, +#endif #ifdef CONFIG_LP_OC_620 { 5, 620000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, CPUFREQ_TABLE_END }, #endif #ifdef CONFIG_LP_OC_550 { 5, 550000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, CPUFREQ_TABLE_END }, #endif #else { 5, 620000 }, -#endif { 6, 860000 }, { 7, 1000000 }, { 8, 1100000 }, @@ -4662,6 +4692,7 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { {12, 1500000 }, {13, 1600000 }, {14, CPUFREQ_TABLE_END }, +#endif }; static struct cpufreq_frequency_table freq_table_1p7GHz[] = { diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 70d78d3a4c5..044a611145e 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -225,6 +225,9 @@ static struct dvfs core_dvfs_table[] = { /* Clock limits for internal blocks, PLLs */ CORE_DVFS("cpu_lp", 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), #ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_700 + CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 342000, 475000, 550000, 700000, 700000, 700000, 700000, 700000), +#endif #ifdef CONFIG_LP_OC_620 CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), #endif From 017c7094cde326a6264f1e3c5a9003247bdf6c34 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 20:33:04 -0500 Subject: [PATCH 161/678] cpufreq: interactive: make intial input boost freq depend on lp overclock config --- drivers/cpufreq/cpufreq_interactive.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 6a0490c1884..da22e89ca0c 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -83,7 +83,19 @@ static struct cpufreq_interactive_core_lock core_lock; static unsigned int hispeed_freq = 1100000; /* CPU will be boosted to this freq - default 1000Mhz - when an input event is detected */ +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_700 +static unsigned int input_boost_freq = 700000; +#endif +#ifdef CONFIG_LP_OC_620 static unsigned int input_boost_freq = 620000; +#endif +#ifdef CONFIG_LP_OC_550 +static unsigned int input_boost_freq = 550000; +#endif +#else +static unsigned int input_boost_freq = 475000; +#endif /* Go to hispeed_freq when CPU load at or above this value. */ #define DEFAULT_GO_HISPEED_LOAD 80 From 94c31dfe2a104075f9ec825febbf06e151fb081c Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 20:34:37 -0500 Subject: [PATCH 162/678] Revert " arm: tegra: usb: phy: fix hotplug function" This reverts commit e34b8d1408d1bb875c298b182d34657c4473f577. --- arch/arm/mach-tegra/usb_phy.c | 6 +++--- drivers/usb/host/ehci-tegra.c | 8 +------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index 6e84e3d8279..e40e2415c5a 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -865,7 +865,7 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy) val |= HOSTPC1_DEVLC_PHCD; writel(val, base + HOSTPC1_DEVLC); #endif - if (phy->hotplug) { + if (phy->instance == 2) { val = readl(base + USB_SUSP_CTRL); val |= USB_PHY_CLK_VALID_INT_ENB; writel(val, base + USB_SUSP_CTRL); @@ -1482,7 +1482,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) writel(val, base + UTMIP_BAT_CHRG_CFG0); } - if (!phy->hotplug) { + if (phy->instance != 2) { val = readl(base + UTMIP_XCVR_CFG0); val |= (UTMIP_FORCE_PD_POWERDOWN | UTMIP_FORCE_PD2_POWERDOWN | UTMIP_FORCE_PDZI_POWERDOWN); @@ -1512,7 +1512,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) utmi_phy_clk_disable(phy); - utmip_pad_power_off(phy, is_dpd); + utmip_pad_power_off(phy, true); return 0; } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 76f40688f82..cad3bbb2942 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -208,6 +208,7 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB | TEGRA_USB_PHY_CLK_VALID_INT_STS; writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); + val = readl(&hw->status); if (!(val & STS_PCD)) { spin_unlock(&ehci->lock); @@ -217,12 +218,6 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~(TEGRA_USB_PORTSC1_WKCN | PORT_RWC_BITS); writel(val , (hcd->regs + TEGRA_USB_PORTSC1_OFFSET)); } - else if (tegra->bus_suspended && - tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { - printk("%s: no device connected before suspend\n", __func__); - spin_unlock(&ehci->lock); - return 0; - } spin_unlock(&ehci->lock); } @@ -1371,7 +1366,6 @@ static int tegra_ehci_remove(struct platform_device *pdev) usb_remove_hcd(hcd); usb_put_hcd(hcd); tegra_usb_phy_power_off(tegra->phy, true); - tegra_ehci_disable_phy_interrupt(hcd); tegra_usb_phy_close(tegra->phy); iounmap(hcd->regs); From 15169ccf4af9f465d925ade84a9291f54779c4cd Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Wed, 3 Oct 2012 18:37:18 +0200 Subject: [PATCH 163/678] drivers: cpufreq: ondemand: add touchpoke & 2phase. Signed-off-by: Dennis Rassmann Conflicts: drivers/cpufreq/cpufreq_ondemand.c --- drivers/cpufreq/cpufreq_ondemand.c | 396 ++++++++++++++++++++++++++++- 1 file changed, 386 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c72b0170499..1eaf938ae21 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -22,22 +22,30 @@ #include #include #include +#include +#include +#include +#include +#include /* * dbs is used in this file as a shortform for demandbased switching * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (3) -#define DEF_FREQUENCY_UP_THRESHOLD (95) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) #define MICRO_FREQUENCY_UP_THRESHOLD (95) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) -#define MICRO_FREQUENCY_DEF_SAMPLE_RATE (15000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_SAMPLING_RATE (50000) +#define DEF_IO_IS_BUSY (1) +#define DEF_UI_DYNAMIC_SAMPLING_RATE (30000) +#define DEF_UI_COUNTER (5) /* * The polling frequency of this governor depends on the capability of @@ -52,6 +60,7 @@ #define MIN_SAMPLING_RATE_RATIO (2) static unsigned int min_sampling_rate; +static unsigned int def_sampling_rate; #define LATENCY_MULTIPLIER (1000) #define MIN_LATENCY_MULTIPLIER (100) @@ -95,9 +104,11 @@ struct cpu_dbs_info_s { */ struct mutex timer_mutex; }; + static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ +static unsigned int g_ui_counter = 0; /* * dbs_mutex protects dbs_enable in governor start/stop. @@ -112,12 +123,25 @@ static struct dbs_tuners { unsigned int sampling_down_factor; unsigned int powersave_bias; unsigned int io_is_busy; +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + unsigned int two_phase_freq; +#endif + unsigned int touch_poke; + unsigned int origin_sampling_rate; + unsigned int ui_sampling_rate; + unsigned int ui_counter; } dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + .two_phase_freq = 0, +#endif + .touch_poke = 1, + .ui_sampling_rate = DEF_UI_DYNAMIC_SAMPLING_RATE, + .ui_counter = DEF_UI_COUNTER, }; static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, @@ -254,9 +278,15 @@ show_one(sampling_rate, sampling_rate); show_one(io_is_busy, io_is_busy); show_one(up_threshold, up_threshold); show_one(sampling_down_factor, sampling_down_factor); +show_one(down_differential, down_differential); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); - +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE +show_one(two_phase_freq, two_phase_freq); +#endif +show_one(touch_poke, touch_poke); +show_one(ui_sampling_rate, ui_sampling_rate); +show_one(ui_counter, ui_counter); static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -266,6 +296,59 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; + return count; +} +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE +static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.two_phase_freq = input; + + return count; +} +#endif + +static unsigned int Touch_poke_attr[4] = {1300000, 880000, 0, 0}; +static unsigned int Touch_poke_boost_duration_ms = 0; +static unsigned long Touch_poke_boost_till_jiffies = 0; + +static ssize_t store_touch_poke(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + int ret; + ret = sscanf(buf, "%u,%u,%u,%u,%u", &Touch_poke_attr[0], &Touch_poke_attr[1], + &Touch_poke_attr[2], &Touch_poke_attr[3], &Touch_poke_boost_duration_ms); + if (ret < 4) + return -EINVAL; + + if (ret != 5) + Touch_poke_boost_duration_ms = 0; + + if(Touch_poke_attr[0] == 0) + dbs_tuners_ins.touch_poke = 0; + else + dbs_tuners_ins.touch_poke = 1; + + return count; +} +static ssize_t store_ui_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.ui_sampling_rate = max(input, min_sampling_rate); + return count; } @@ -297,6 +380,21 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, return count; } +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if(ret != 1 || input > DEF_FREQUENCY_DOWN_DIFFERENTIAL || + input < MICRO_FREQUENCY_DOWN_DIFFERENTIAL) { + return -EINVAL; + } + dbs_tuners_ins.down_differential = input; + return count; +} + static ssize_t store_sampling_down_factor(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -368,21 +466,49 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, return count; } +static ssize_t store_ui_counter(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if(ret != 1) + return -EINVAL; + + dbs_tuners_ins.ui_counter = input; + return count; +} + define_one_global_rw(sampling_rate); define_one_global_rw(io_is_busy); define_one_global_rw(up_threshold); +define_one_global_rw(down_differential); define_one_global_rw(sampling_down_factor); define_one_global_rw(ignore_nice_load); define_one_global_rw(powersave_bias); +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE +define_one_global_rw(two_phase_freq); +#endif +define_one_global_rw(touch_poke); +define_one_global_rw(ui_sampling_rate); +define_one_global_rw(ui_counter); static struct attribute *dbs_attributes[] = { &sampling_rate_min.attr, &sampling_rate.attr, &up_threshold.attr, + &down_differential.attr, &sampling_down_factor.attr, &ignore_nice_load.attr, &powersave_bias.attr, &io_is_busy.attr, +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + &two_phase_freq.attr, +#endif + &touch_poke.attr, + &ui_sampling_rate.attr, + &ui_counter.attr, NULL }; @@ -397,23 +523,43 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) { if (dbs_tuners_ins.powersave_bias) freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); - else if (p->cur == p->max) - return; + //else if (p->cur == p->max) + // return; __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); } +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE +int set_two_phase_freq(int cpufreq) +{ + dbs_tuners_ins.two_phase_freq = cpufreq; + return 0; +} +#endif static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { unsigned int max_load_freq; + unsigned int debug_freq; + unsigned int debug_load; + unsigned int debug_iowait; struct cpufreq_policy *policy; unsigned int j; +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + static unsigned int phase = 0; + static unsigned int counter = 0; +#endif this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; + /* + * keep freq for touch boost + */ + if (Touch_poke_boost_till_jiffies > jiffies) + return; + /* * Every sampling_rate, we check, if current idle time is less * than 20% (default), then we try to increase frequency @@ -490,20 +636,60 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) freq_avg = policy->cur; load_freq = load * freq_avg; - if (load_freq > max_load_freq) + if (load_freq > max_load_freq) { max_load_freq = load_freq; + debug_load = load; + debug_iowait = 100 * iowait_time / wall_time; + } + } + + if (g_ui_counter > 0){ + g_ui_counter--; + if(g_ui_counter == 0) + dbs_tuners_ins.sampling_rate = dbs_tuners_ins.origin_sampling_rate; } /* Check for frequency increase */ if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { /* If switching to max speed, apply sampling_down_factor */ +#ifndef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE if (policy->cur < policy->max) this_dbs_info->rate_mult = dbs_tuners_ins.sampling_down_factor; + debug_freq = policy->max; dbs_freq_increase(policy, policy->max); +#else + if (counter < 5) { + counter++; + if (counter > 2) { + /* change to busy phase */ + phase = 1; + } + } + if (dbs_tuners_ins.two_phase_freq != 0 && phase == 0) { + debug_freq = dbs_tuners_ins.two_phase_freq; + /* idle phase */ + dbs_freq_increase(policy, dbs_tuners_ins.two_phase_freq); + } else { + /* busy phase */ + if (policy->cur < policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + debug_freq = policy->max; + dbs_freq_increase(policy, policy->max); + } +#endif return; } - +#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + if (counter > 0) { + counter--; + if (counter == 0) { + /* change to idle phase */ + phase = 0; + } + } +#endif /* Check for frequency decrease */ /* if we cannot reduce the frequency anymore, break out early */ if (policy->cur == policy->min) @@ -529,11 +715,13 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) freq_next = policy->min; if (!dbs_tuners_ins.powersave_bias) { + debug_freq = freq_next; __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); } else { int freq = powersave_bias_target(policy, freq_next, CPUFREQ_RELATION_L); + debug_freq = freq; __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); } @@ -617,9 +805,175 @@ static int should_io_be_busy(void) boot_cpu_data.x86_model >= 15) return 1; #endif + return DEF_IO_IS_BUSY; +} + +#define AID_SYSTEM (1000) +static void dbs_chown(void) +{ + int ret; + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/ignore_nice_load", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ignore_nice_load error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/io_is_busy", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown io_is_busy error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/powersave_bias", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown powersave_bias error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/sampling_down_factor", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_down_factor error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/sampling_rate", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_rate error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/two_phase_freq", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown two_phase_freq error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/up_threshold", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown up_threshold error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/down_differential", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown down_differential error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/touch_poke", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_poke error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/ui_sampling_rate", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ui_sampling_rate error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/ui_counter", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ui_counter error: %d", ret); +} + +static void dbs_refresh_callback(struct work_struct *unused) +{ + struct cpufreq_policy *policy; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int nr_cpus; + unsigned int touch_poke_freq; + unsigned int cpu = smp_processor_id(); + + if (lock_policy_rwsem_write(cpu) < 0) + return; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + policy = this_dbs_info->cur_policy; + + g_ui_counter = dbs_tuners_ins.ui_counter; + if(dbs_tuners_ins.ui_counter > 0) + dbs_tuners_ins.sampling_rate = dbs_tuners_ins.ui_sampling_rate; + if (Touch_poke_boost_duration_ms) + Touch_poke_boost_till_jiffies = + jiffies + msecs_to_jiffies(Touch_poke_boost_duration_ms); + + /* We poke the frequency base on the online cpu number */ + nr_cpus = num_online_cpus(); + + touch_poke_freq = Touch_poke_attr[nr_cpus-1]; + + if(touch_poke_freq == 0 || policy->cur >= touch_poke_freq){ + unlock_policy_rwsem_write(cpu); + return; + } + + __cpufreq_driver_target(policy, touch_poke_freq, + CPUFREQ_RELATION_L); + this_dbs_info->prev_cpu_idle = get_cpu_idle_time(cpu, + &this_dbs_info->prev_cpu_wall); + + unlock_policy_rwsem_write(cpu); +} + +static DECLARE_WORK(dbs_refresh_work, dbs_refresh_callback); + +static void dbs_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + if (dbs_tuners_ins.touch_poke) + schedule_work(&dbs_refresh_work); +} + +static int input_dev_filter(const char* input_dev_name) +{ + int ret = 0; + if (strstr(input_dev_name, "touchscreen") || + strstr(input_dev_name, "-keypad") || + strstr(input_dev_name, "-nav") || + strstr(input_dev_name, "-oj")) { + } + else { + ret = 1; + } + return ret; +} + + +static int dbs_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + struct input_handle *handle; + int error; + + /* filter out those input_dev that we don't care */ + if (input_dev_filter(dev->name)) + return 0; + + handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "cpufreq"; + + error = input_register_handle(handle); + if (error) + goto err2; + + error = input_open_device(handle); + if (error) + goto err1; + return 0; +err1: + input_unregister_handle(handle); +err2: + kfree(handle); + return error; +} + +static void dbs_input_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); } +static const struct input_device_id dbs_ids[] = { + { .driver_info = 1 }, + { }, +}; +static struct input_handler dbs_input_handler = { + .event = dbs_input_event, + .connect = dbs_input_connect, + .disconnect = dbs_input_disconnect, + .name = "cpufreq_ond", + .id_table = dbs_ids, +}; + static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { @@ -667,15 +1021,26 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return rc; } + dbs_chown(); + /* policy latency is in nS. Convert it to uS first */ latency = policy->cpuinfo.transition_latency / 1000; if (latency == 0) latency = 1; /* Bring kernel and HW constraints together */ - min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; - dbs_tuners_ins.sampling_rate = MICRO_FREQUENCY_DEF_SAMPLE_RATE; + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); + if (def_sampling_rate) + dbs_tuners_ins.sampling_rate = def_sampling_rate; + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; dbs_tuners_ins.io_is_busy = should_io_be_busy(); } + if (!cpu) + rc = input_register_handler(&dbs_input_handler); + mutex_unlock(&dbs_mutex); mutex_init(&this_dbs_info->timer_mutex); @@ -688,6 +1053,10 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_lock(&dbs_mutex); mutex_destroy(&this_dbs_info->timer_mutex); dbs_enable--; + + if (!cpu) + input_unregister_handler(&dbs_input_handler); + if (!dbs_enable) sysfs_remove_group(cpufreq_global_kobject, &dbs_attr_group); @@ -716,6 +1085,7 @@ static int __init cpufreq_gov_dbs_init(void) idle_time = get_cpu_idle_time_us(cpu, &wall); put_cpu(); + if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; dbs_tuners_ins.down_differential = @@ -726,6 +1096,12 @@ static int __init cpufreq_gov_dbs_init(void) * timer might skip some samples if idle/sleeping as needed. */ min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + /* For correct statistics, we need 10 ticks for each measure */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + } + def_sampling_rate = DEF_SAMPLING_RATE; return cpufreq_register_governor(&cpufreq_gov_ondemand); } From 4258f10dead99de33a201aa1bddc0701729138c6 Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Wed, 3 Oct 2012 18:53:01 +0200 Subject: [PATCH 164/678] mach-tegra: pm: fix wrong awake cpufreq Signed-off-by: Dennis Rassmann --- arch/arm/mach-tegra/pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index 7d2d1016843..7b883c9d683 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -176,7 +176,7 @@ struct suspend_context tegra_sctx; #define MC_SECURITY_SIZE 0x70 #define MC_SECURITY_CFG2 0x7c -#define AWAKE_CPU_FREQ_MIN 100000 +#define AWAKE_CPU_FREQ_MIN 102000 static struct pm_qos_request_list awake_cpu_freq_req; struct dvfs_rail *tegra_cpu_rail; From 09504def966cca9f0beb0b00f1d430252925f147 Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Wed, 3 Oct 2012 18:54:23 +0200 Subject: [PATCH 165/678] mach-tegra: pm: remove awake frequency lock Signed-off-by: Dennis Rassmann --- arch/arm/mach-tegra/pm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index 7b883c9d683..3c1064cf4c1 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include @@ -176,9 +175,6 @@ struct suspend_context tegra_sctx; #define MC_SECURITY_SIZE 0x70 #define MC_SECURITY_CFG2 0x7c -#define AWAKE_CPU_FREQ_MIN 102000 -static struct pm_qos_request_list awake_cpu_freq_req; - struct dvfs_rail *tegra_cpu_rail; static struct dvfs_rail *tegra_core_rail; static struct clk *tegra_pclk; @@ -1024,8 +1020,6 @@ void __init tegra_init_suspend(struct tegra_suspend_platform_data *plat) tegra_cpu_rail = tegra_dvfs_get_rail_by_name("vdd_cpu"); tegra_core_rail = tegra_dvfs_get_rail_by_name("vdd_core"); - pm_qos_add_request(&awake_cpu_freq_req, PM_QOS_CPU_FREQ_MIN, - AWAKE_CPU_FREQ_MIN); tegra_pclk = clk_get_sys(NULL, "pclk"); BUG_ON(IS_ERR(tegra_pclk)); @@ -1267,14 +1261,12 @@ static void pm_early_suspend(struct early_suspend *h) { if (clk_wake) clk_disable(clk_wake); - pm_qos_update_request(&awake_cpu_freq_req, PM_QOS_DEFAULT_VALUE); } static void pm_late_resume(struct early_suspend *h) { if (clk_wake) clk_enable(clk_wake); - pm_qos_update_request(&awake_cpu_freq_req, (s32)AWAKE_CPU_FREQ_MIN); } static struct early_suspend pm_early_suspender = { From 753ab054eca9f14e6cb476e031981ebf28c53442 Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Tue, 18 Sep 2012 01:32:35 +0200 Subject: [PATCH 166/678] cpufreq: ondemand: limit 2phase freq to max lpcpu if only one cpu is online Signed-off-by: Dennis Rassmann --- drivers/cpufreq/cpufreq_ondemand.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 1eaf938ae21..2b750e4780b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -668,8 +668,13 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) } if (dbs_tuners_ins.two_phase_freq != 0 && phase == 0) { debug_freq = dbs_tuners_ins.two_phase_freq; - /* idle phase */ - dbs_freq_increase(policy, dbs_tuners_ins.two_phase_freq); + /* idle phase + * limit the frequency to max lpcpu if only 1 cpu is online + * this should avoid fast "peak"-switching out of lpcpu */ + if (num_online_cpus() > 1) + dbs_freq_increase(policy, dbs_tuners_ins.two_phase_freq); + else + dbs_freq_increase(policy, 475000); } else { /* busy phase */ if (policy->cur < policy->max) From 444481aae2e180c57657928915ec88ca060fcfab Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Tue, 18 Sep 2012 01:57:29 +0200 Subject: [PATCH 167/678] cpufreq: ondemand: 2phase: check for lp instead of online cpus Signed-off-by: Dennis Rassmann Conflicts: drivers/cpufreq/cpufreq_ondemand.c --- drivers/cpufreq/cpufreq_ondemand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 2b750e4780b..9138d6577e9 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -28,6 +28,8 @@ #include #include +#include "../../arch/arm/mach-tegra/pm.h" + /* * dbs is used in this file as a shortform for demandbased switching * It helps to keep variable names smaller, simpler @@ -671,7 +673,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* idle phase * limit the frequency to max lpcpu if only 1 cpu is online * this should avoid fast "peak"-switching out of lpcpu */ - if (num_online_cpus() > 1) + if (!is_lp_cluster()) dbs_freq_increase(policy, dbs_tuners_ins.two_phase_freq); else dbs_freq_increase(policy, 475000); From f4d53c27f414cc1157b378fa56658c63a0ea2436 Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Sat, 6 Oct 2012 16:51:38 +0200 Subject: [PATCH 168/678] cpufreq: ondemand: remove fixed idle frequency. Always set to lpcore max clock Signed-off-by: Dennis Rassmann Conflicts: drivers/cpufreq/cpufreq_ondemand.c --- drivers/cpufreq/cpufreq_ondemand.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 9138d6577e9..afa68a7c4c4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -27,8 +27,11 @@ #include #include #include +#include +#include "../../arch/arm/mach-tegra/clock.h" #include "../../arch/arm/mach-tegra/pm.h" +#include "../../arch/arm/mach-tegra/tegra_pmqos.h" /* * dbs is used in this file as a shortform for demandbased switching @@ -72,6 +75,10 @@ static void do_dbs_timer(struct work_struct *work); static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); +/* lpcpu variables */ +static struct clk *cpu_lp_clk; +static unsigned int idle_top_freq; + #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND static #endif @@ -676,7 +683,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) if (!is_lp_cluster()) dbs_freq_increase(policy, dbs_tuners_ins.two_phase_freq); else - dbs_freq_increase(policy, 475000); + dbs_freq_increase(policy, idle_top_freq); } else { /* busy phase */ if (policy->cur < policy->max) @@ -1090,6 +1097,9 @@ static int __init cpufreq_gov_dbs_init(void) u64 idle_time; int cpu = get_cpu(); + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + idle_time = get_cpu_idle_time_us(cpu, &wall); put_cpu(); if (idle_time != -1ULL) { From f9d46e1217ed3e4288fcff495889952f55375aec Mon Sep 17 00:00:00 2001 From: Dennis Rassmann Date: Wed, 3 Oct 2012 19:10:30 +0200 Subject: [PATCH 169/678] drivers: cpufreq: make unlock_policy_rwsem_write/read non static Signed-off-by: Dennis Rassmann --- drivers/cpufreq/cpufreq.c | 6 +++--- include/linux/cpufreq.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1738ad91c5a..09435a35965 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -69,7 +69,7 @@ static DEFINE_PER_CPU(int, cpufreq_policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); #define lock_policy_rwsem(mode, cpu) \ -static int lock_policy_rwsem_##mode \ +int lock_policy_rwsem_##mode \ (int cpu) \ { \ int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ @@ -87,14 +87,14 @@ lock_policy_rwsem(read, cpu); lock_policy_rwsem(write, cpu); -static void unlock_policy_rwsem_read(int cpu) +void unlock_policy_rwsem_read(int cpu) { int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); BUG_ON(policy_cpu == -1); up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); } -static void unlock_policy_rwsem_write(int cpu) +void unlock_policy_rwsem_write(int cpu) { int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); BUG_ON(policy_cpu == -1); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 45ff4905d1b..251a08aaac9 100755 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -203,6 +203,9 @@ extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy, int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +int lock_policy_rwsem_write(int cpu); +void unlock_policy_rwsem_write(int cpu); + /********************************************************************* * CPUFREQ DRIVER INTERFACE * From 5beedd465aba7c2bb0be5a77df2d6f6d9df232f4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 21:56:17 -0500 Subject: [PATCH 170/678] Revert "mach-tegra: pm: remove awake frequency lock" This reverts commit f618bab40dbcf9ef1a43e4b3acf3423bb4a94c49. --- arch/arm/mach-tegra/pm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index 3c1064cf4c1..7b883c9d683 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -175,6 +176,9 @@ struct suspend_context tegra_sctx; #define MC_SECURITY_SIZE 0x70 #define MC_SECURITY_CFG2 0x7c +#define AWAKE_CPU_FREQ_MIN 102000 +static struct pm_qos_request_list awake_cpu_freq_req; + struct dvfs_rail *tegra_cpu_rail; static struct dvfs_rail *tegra_core_rail; static struct clk *tegra_pclk; @@ -1020,6 +1024,8 @@ void __init tegra_init_suspend(struct tegra_suspend_platform_data *plat) tegra_cpu_rail = tegra_dvfs_get_rail_by_name("vdd_cpu"); tegra_core_rail = tegra_dvfs_get_rail_by_name("vdd_core"); + pm_qos_add_request(&awake_cpu_freq_req, PM_QOS_CPU_FREQ_MIN, + AWAKE_CPU_FREQ_MIN); tegra_pclk = clk_get_sys(NULL, "pclk"); BUG_ON(IS_ERR(tegra_pclk)); @@ -1261,12 +1267,14 @@ static void pm_early_suspend(struct early_suspend *h) { if (clk_wake) clk_disable(clk_wake); + pm_qos_update_request(&awake_cpu_freq_req, PM_QOS_DEFAULT_VALUE); } static void pm_late_resume(struct early_suspend *h) { if (clk_wake) clk_enable(clk_wake); + pm_qos_update_request(&awake_cpu_freq_req, (s32)AWAKE_CPU_FREQ_MIN); } static struct early_suspend pm_early_suspender = { From 52e6dbc2e91d60d6cc84d6764bea5be7a5ffc446 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 26 Nov 2012 21:57:01 -0500 Subject: [PATCH 171/678] Revert "mach-tegra: pm: fix wrong awake cpufreq" This reverts commit 081df89342190dfe8a6161d7d918322ff2bc75c8. --- arch/arm/mach-tegra/pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index 7b883c9d683..7d2d1016843 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -176,7 +176,7 @@ struct suspend_context tegra_sctx; #define MC_SECURITY_SIZE 0x70 #define MC_SECURITY_CFG2 0x7c -#define AWAKE_CPU_FREQ_MIN 102000 +#define AWAKE_CPU_FREQ_MIN 100000 static struct pm_qos_request_list awake_cpu_freq_req; struct dvfs_rail *tegra_cpu_rail; From 51edcaa45eda040b5471ba5f5978741846017b44 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 27 Nov 2012 00:32:05 -0500 Subject: [PATCH 172/678] cpufreq: ondemand: changes to touch boost if lp cluster is active go to lp max on touch --- drivers/cpufreq/cpufreq_ondemand.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index afa68a7c4c4..d0b1bf83459 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -38,8 +38,8 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define DEF_FREQUENCY_UP_THRESHOLD (95) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) @@ -146,7 +146,7 @@ static struct dbs_tuners { .ignore_nice = 0, .powersave_bias = 0, #ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE - .two_phase_freq = 0, + .two_phase_freq = 1100000, #endif .touch_poke = 1, .ui_sampling_rate = DEF_UI_DYNAMIC_SAMPLING_RATE, @@ -324,7 +324,7 @@ static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, } #endif -static unsigned int Touch_poke_attr[4] = {1300000, 880000, 0, 0}; +static unsigned int Touch_poke_attr[4] = {1100000, 860000, 0, 0}; static unsigned int Touch_poke_boost_duration_ms = 0; static unsigned long Touch_poke_boost_till_jiffies = 0; @@ -896,7 +896,10 @@ static void dbs_refresh_callback(struct work_struct *unused) /* We poke the frequency base on the online cpu number */ nr_cpus = num_online_cpus(); - touch_poke_freq = Touch_poke_attr[nr_cpus-1]; + if (!is_lp_cluster()) + touch_poke_freq = Touch_poke_attr[nr_cpus-1]; + else + touch_poke_freq = idle_top_freq; if(touch_poke_freq == 0 || policy->cur >= touch_poke_freq){ unlock_policy_rwsem_write(cpu); From 0b12e19460b07ddcff385074e91d63c8be228b63 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 27 Nov 2012 00:32:46 -0500 Subject: [PATCH 173/678] mach-tegra: freq table change back to 860Mhz --- arch/arm/mach-tegra/tegra3_clocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index df9af77ba97..32fce0ed171 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4647,7 +4647,7 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { #ifdef CONFIG_LP_OC_700 { 5, 550000 }, { 5, 700000 }, - { 7, 900000 }, + { 7, 860000 }, { 8, 1000000 }, { 9, 1100000 }, {10, 1200000 }, From 7938df6f11891fd4a85a5ac918a937f15f3cbc29 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:35:36 +0300 Subject: [PATCH 174/678] cpuquiet: base files Change-Id: I611b72a2e63fffc788bc6c7594c738d5ad65e06f Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105263 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/cpuquiet.c | 32 ++++++++++++++++++++++++++++++++ drivers/cpuquiet/cpuquiet.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 drivers/cpuquiet/cpuquiet.c create mode 100644 drivers/cpuquiet/cpuquiet.h diff --git a/drivers/cpuquiet/cpuquiet.c b/drivers/cpuquiet/cpuquiet.c new file mode 100644 index 00000000000..d902af26c8d --- /dev/null +++ b/drivers/cpuquiet/cpuquiet.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include "cpuquiet.h" + +DEFINE_MUTEX(cpuquiet_lock); + +static int __init cpuquiet_init(void) +{ + return cpuquiet_add_class_sysfs(&cpu_sysdev_class); +} + +core_initcall(cpuquiet_init); diff --git a/drivers/cpuquiet/cpuquiet.h b/drivers/cpuquiet/cpuquiet.h new file mode 100644 index 00000000000..fa61946ff11 --- /dev/null +++ b/drivers/cpuquiet/cpuquiet.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef __DRIVER_CPUQUIET_H +#define __DRIVER_CPUQUIET_H + +#include + +extern struct mutex cpuquiet_lock; +extern struct cpuquiet_governor *cpuquiet_curr_governor; +extern struct list_head cpuquiet_governors; +int cpuquiet_add_class_sysfs(struct sysdev_class *cls); +struct cpuquiet_governor *cpuquiet_find_governor(const char *str); +int cpuquiet_switch_governor(struct cpuquiet_governor *gov); +struct cpuquiet_governor *cpuquiet_get_first_governor(void); +struct cpuquiet_driver *cpuquiet_get_driver(void); +void cpuquiet_add_dev(struct sys_device *sys_dev, unsigned int cpu); +void cpuquiet_remove_dev(unsigned int cpu); +int cpuquiet_cpu_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name, int cpu); +#endif From bba2324ea30f2da833881a9fa309efa1ccc12103 Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Wed, 23 May 2012 11:30:47 -0700 Subject: [PATCH 175/678] cpuquiet: Kconfig Signed-off-by: Sai Charan Gurrappadi Change-Id: I930dedaa3bf7b2c64bc61f28c3461f125cca2f44 Reviewed-on: http://git-master/r/105264 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Peter De Schrijver Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/Kconfig | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 drivers/cpuquiet/Kconfig diff --git a/drivers/cpuquiet/Kconfig b/drivers/cpuquiet/Kconfig new file mode 100644 index 00000000000..844cd34a69b --- /dev/null +++ b/drivers/cpuquiet/Kconfig @@ -0,0 +1,11 @@ +menu "CPUQUIET Framework" + +config CPUQUIET_FRAMEWORK + bool "Cpuquiet framework" + default n + help + Cpuquiet implements pluggable policies for forcing cpu cores into a + quiescent state. Appropriate policies will save power without hurting + performance. + +endmenu From 594bf2de54f9d6702274a2264e15b31916ea653f Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:38:47 +0300 Subject: [PATCH 176/678] cpuquiet: public interfaces for cpuquiet Change-Id: Ie391d6d11fad7b76b0bf5daff67ac46663651dc0 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105265 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- include/linux/cpuquiet.h | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/cpuquiet.h diff --git a/include/linux/cpuquiet.h b/include/linux/cpuquiet.h new file mode 100644 index 00000000000..8459af7aad7 --- /dev/null +++ b/include/linux/cpuquiet.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef _LINUX_CPUONLINE_H +#define _LINUX_CPUONLINE_H + +#include +#include + +#define CPUQUIET_NAME_LEN 16 + +struct cpuquiet_governor { + char name[CPUQUIET_NAME_LEN]; + struct list_head governor_list; + int (*start) (void); + void (*stop) (void); + int (*store_active) (unsigned int cpu, bool active); + struct module *owner; +}; + +struct cpuquiet_driver { + char name[CPUQUIET_NAME_LEN]; + int (*quiesence_cpu) (unsigned int cpunumber); + int (*wake_cpu) (unsigned int cpunumber); +}; + +extern int cpuquiet_register_governor(struct cpuquiet_governor *gov); +extern void cpuquiet_unregister_governor(struct cpuquiet_governor *gov); +extern int cpuquiet_quiesence_cpu(unsigned int cpunumber); +extern int cpuquiet_wake_cpu(unsigned int cpunumber); +extern int cpuquiet_register_driver(struct cpuquiet_driver *drv); +extern void cpuquiet_unregister_driver(struct cpuquiet_driver *drv); +extern int cpuquiet_add_group(struct attribute_group *attrs); +extern void cpuquiet_remove_group(struct attribute_group *attrs); +int cpuquiet_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name); +extern unsigned int nr_cluster_ids; +#endif From c6da89bf42d398f4517c78c34f0b3b47b4b2e310 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:40:44 +0300 Subject: [PATCH 177/678] cpuquiet: driver support Change-Id: I4f3f67d4459eeda519efdfd80e1283bef2d597e3 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105266 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/driver.c | 200 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 drivers/cpuquiet/driver.c diff --git a/drivers/cpuquiet/driver.c b/drivers/cpuquiet/driver.c new file mode 100644 index 00000000000..f9dcdf018f5 --- /dev/null +++ b/drivers/cpuquiet/driver.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cpuquiet.h" + +struct cpuquiet_cpu_stat { + cputime64_t time_up_total; + u64 last_update; + unsigned int up_down_count; + struct kobject cpu_kobject; +}; + +struct cpu_attribute { + struct attribute attr; + enum { up_down_count, time_up_total } type; +}; + +static struct cpuquiet_driver *cpuquiet_curr_driver; +struct cpuquiet_cpu_stat *stats; + +#define CPU_ATTRIBUTE(_name) \ + static struct cpu_attribute _name ## _attr = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .type = _name, \ +} + +CPU_ATTRIBUTE(up_down_count); +CPU_ATTRIBUTE(time_up_total); + +static struct attribute *cpu_attributes[] = { + &up_down_count_attr.attr, + &time_up_total_attr.attr, + NULL, +}; + +static void stats_update(struct cpuquiet_cpu_stat *stat, bool up) +{ + u64 cur_jiffies = get_jiffies_64(); + bool was_up = stat->up_down_count & 0x1; + + if (was_up) + stat->time_up_total = cputime64_add(stat->time_up_total, + cputime64_sub(cur_jiffies, stat->last_update)); + + if (was_up != up) + stat->up_down_count++; + + stat->last_update = cur_jiffies; +} + +int cpuquiet_quiesence_cpu(unsigned int cpunumber) +{ + int err = -EPERM; + + if (cpuquiet_curr_driver && cpuquiet_curr_driver->quiesence_cpu) + err = cpuquiet_curr_driver->quiesence_cpu(cpunumber); + + stats_update(stats + cpunumber, 0); + + return err; +} +EXPORT_SYMBOL(cpuquiet_quiesence_cpu); + +int cpuquiet_wake_cpu(unsigned int cpunumber) +{ + int err = -EPERM; + + if (cpuquiet_curr_driver && cpuquiet_curr_driver->wake_cpu) + err = cpuquiet_curr_driver->wake_cpu(cpunumber); + + stats_update(stats + cpunumber, 1); + + return err; +} +EXPORT_SYMBOL(cpuquiet_wake_cpu); + +static ssize_t stats_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpu_attribute *cattr = + container_of(attr, struct cpu_attribute, attr); + struct cpuquiet_cpu_stat *stat = + container_of(kobj, struct cpuquiet_cpu_stat, cpu_kobject); + ssize_t len = 0; + bool was_up = stat->up_down_count & 0x1; + + stats_update(stat, was_up); + + switch (cattr->type) { + case up_down_count: + len = sprintf(buf, "%u\n", stat->up_down_count); + break; + case time_up_total: + len = sprintf(buf, "%llu\n", stat->time_up_total); + break; + } + + return len; +} + +static const struct sysfs_ops stats_sysfs_ops = { + .show = stats_sysfs_show, +}; + +static struct kobj_type ktype_cpu_stats = { + .sysfs_ops = &stats_sysfs_ops, + .default_attrs = cpu_attributes, +}; + +int cpuquiet_register_driver(struct cpuquiet_driver *drv) +{ + int err = -EBUSY; + unsigned int cpu; + struct sys_device *sys_dev; + u64 cur_jiffies; + + if (!drv) + return -EINVAL; + + stats = kzalloc(nr_cpu_ids * sizeof(*stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + cur_jiffies = get_jiffies_64(); + stats[cpu].last_update = cur_jiffies; + if (cpu_online(cpu)) + stats[cpu].up_down_count = 1; + sys_dev = get_cpu_sysdev(cpu); + if (sys_dev) { + cpuquiet_add_dev(sys_dev, cpu); + cpuquiet_cpu_kobject_init(&stats[cpu].cpu_kobject, + &ktype_cpu_stats, "stats", cpu); + } + } + + mutex_lock(&cpuquiet_lock); + if (!cpuquiet_curr_driver) { + err = 0; + cpuquiet_curr_driver = drv; + cpuquiet_switch_governor(cpuquiet_get_first_governor()); + } + mutex_unlock(&cpuquiet_lock); + + return err; +} +EXPORT_SYMBOL(cpuquiet_register_driver); + +struct cpuquiet_driver *cpuquiet_get_driver(void) +{ + return cpuquiet_curr_driver; +} + +void cpuquiet_unregister_driver(struct cpuquiet_driver *drv) +{ + unsigned int cpu; + + if (drv != cpuquiet_curr_driver) { + WARN(1, "invalid cpuquiet_unregister_driver(%s)\n", + drv->name); + return; + } + + /* stop current governor first */ + cpuquiet_switch_governor(NULL); + + mutex_lock(&cpuquiet_lock); + cpuquiet_curr_driver = NULL; + + for_each_possible_cpu(cpu) { + kobject_put(&stats[cpu].cpu_kobject); + cpuquiet_remove_dev(cpu); + } + + mutex_unlock(&cpuquiet_lock); +} +EXPORT_SYMBOL(cpuquiet_unregister_driver); From 0f62302651d60fddc69929e861a92be90bfe15c5 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:43:21 +0300 Subject: [PATCH 178/678] cpuquiet: sysfs interfaces Change-Id: Idb454f7380c48e2f4bab20e6ae51fef577b0f6c5 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105267 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/sysfs.c | 290 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 drivers/cpuquiet/sysfs.c diff --git a/drivers/cpuquiet/sysfs.c b/drivers/cpuquiet/sysfs.c new file mode 100644 index 00000000000..1e1c14865b2 --- /dev/null +++ b/drivers/cpuquiet/sysfs.c @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include + +#include "cpuquiet.h" + +struct cpuquiet_dev { + unsigned int cpu; + struct kobject kobj; +}; + +struct cpuquiet_sysfs_attr { + struct attribute attr; + ssize_t (*show)(char *); + ssize_t (*store)(const char *, size_t count); +}; + +static struct kobject *cpuquiet_global_kobject; +struct cpuquiet_dev *cpuquiet_cpu_devices[CONFIG_NR_CPUS]; + +static ssize_t show_current_governor(char *buf) +{ + ssize_t ret; + + mutex_lock(&cpuquiet_lock); + + if (cpuquiet_curr_governor) + ret = sprintf(buf, "%s\n", cpuquiet_curr_governor->name); + else + ret = sprintf(buf, "none\n"); + + mutex_unlock(&cpuquiet_lock); + + return ret; + +} + +static ssize_t store_current_governor(const char *buf, size_t count) +{ + char name[CPUQUIET_NAME_LEN]; + struct cpuquiet_governor *gov; + int len = count, ret = -EINVAL; + + if (!len || len >= sizeof(name)) + return -EINVAL; + + memcpy(name, buf, count); + name[len] = '\0'; + if (name[len - 1] == '\n') + name[--len] = '\0'; + + mutex_lock(&cpuquiet_lock); + gov = cpuquiet_find_governor(name); + mutex_unlock(&cpuquiet_lock); + + if (gov) + ret = cpuquiet_switch_governor(gov); + + if (ret) + return ret; + else + return count; +} + +static ssize_t available_governors_show(char *buf) +{ + ssize_t ret = 0, len; + struct cpuquiet_governor *gov; + + mutex_lock(&cpuquiet_lock); + if (!list_empty(&cpuquiet_governors)) { + list_for_each_entry(gov, &cpuquiet_governors, governor_list) { + len = sprintf(buf, "%s ", gov->name); + buf += len; + ret += len; + } + buf--; + *buf = '\n'; + } else + ret = sprintf(buf, "none\n"); + + mutex_unlock(&cpuquiet_lock); + + return ret; +} + +struct cpuquiet_sysfs_attr attr_current_governor = __ATTR(current_governor, + 0644, show_current_governor, store_current_governor); +struct cpuquiet_sysfs_attr attr_governors = __ATTR_RO(available_governors); + + +static struct attribute *cpuquiet_default_attrs[] = { + &attr_current_governor.attr, + &attr_governors.attr, + NULL +}; + +static ssize_t cpuquiet_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpuquiet_sysfs_attr *cattr = + container_of(attr, struct cpuquiet_sysfs_attr, attr); + + return cattr->show(buf); +} + +static ssize_t cpuquiet_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpuquiet_sysfs_attr *cattr = + container_of(attr, struct cpuquiet_sysfs_attr, attr); + + if (cattr->store) + return cattr->store(buf, count); + + return -EINVAL; +} + +static const struct sysfs_ops cpuquiet_sysfs_ops = { + .show = cpuquiet_sysfs_show, + .store = cpuquiet_sysfs_store, +}; + +static struct kobj_type ktype_cpuquiet_sysfs = { + .sysfs_ops = &cpuquiet_sysfs_ops, + .default_attrs = cpuquiet_default_attrs, +}; + +int cpuquiet_add_group(struct attribute_group *attrs) +{ + return sysfs_create_group(cpuquiet_global_kobject, attrs); +} + +void cpuquiet_remove_group(struct attribute_group *attrs) +{ + sysfs_remove_group(cpuquiet_global_kobject, attrs); +} + +int cpuquiet_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name) +{ + int err; + + err = kobject_init_and_add(kobj, type, cpuquiet_global_kobject, name); + if (!err) + kobject_uevent(kobj, KOBJ_ADD); + + return err; +} + +int cpuquiet_cpu_kobject_init(struct kobject *kobj, struct kobj_type *type, + char *name, int cpu) +{ + int err; + + err = kobject_init_and_add(kobj, type, &cpuquiet_cpu_devices[cpu]->kobj, + name); + if (!err) + kobject_uevent(kobj, KOBJ_ADD); + + return err; +} + +int cpuquiet_add_class_sysfs(struct sysdev_class *cls) +{ + int err; + + cpuquiet_global_kobject = kzalloc(sizeof(*cpuquiet_global_kobject), + GFP_KERNEL); + if (!cpuquiet_global_kobject) + return -ENOMEM; + + err = kobject_init_and_add(cpuquiet_global_kobject, + &ktype_cpuquiet_sysfs, &cls->kset.kobj, "cpuquiet"); + if (!err) + kobject_uevent(cpuquiet_global_kobject, KOBJ_ADD); + + return err; +} + + +struct cpuquiet_attr { + struct attribute attr; + ssize_t (*show)(unsigned int, char *); + ssize_t (*store)(unsigned int, const char *, size_t count); +}; + + +static ssize_t cpuquiet_state_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpuquiet_attr *cattr = container_of(attr, + struct cpuquiet_attr, attr); + struct cpuquiet_dev *dev = container_of(kobj, + struct cpuquiet_dev, kobj); + + return cattr->show(dev->cpu, buf); +} + +static ssize_t cpuquiet_state_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpuquiet_attr *cattr = container_of(attr, + struct cpuquiet_attr, attr); + struct cpuquiet_dev *dev = container_of(kobj, + struct cpuquiet_dev, kobj); + + if (cattr->store) + return cattr->store(dev->cpu, buf, count); + + return -EINVAL; +} + +static ssize_t show_active(unsigned int cpu, char *buf) +{ + return sprintf(buf, "%u\n", cpu_online(cpu)); +} + +static ssize_t store_active(unsigned int cpu, const char *value, size_t count) +{ + unsigned int active; + int ret; + + if (!cpuquiet_curr_governor->store_active) + return -EINVAL; + + ret = sscanf(value, "%u", &active); + if (ret != 1) + return -EINVAL; + + cpuquiet_curr_governor->store_active(cpu, active); + + return count; +} + +struct cpuquiet_attr attr_active = __ATTR(active, 0644, show_active, + store_active); + +static struct attribute *cpuquiet_default_cpu_attrs[] = { + &attr_active.attr, + NULL +}; + +static const struct sysfs_ops cpuquiet_cpu_sysfs_ops = { + .show = cpuquiet_state_show, + .store = cpuquiet_state_store, +}; + +static struct kobj_type ktype_cpuquiet = { + .sysfs_ops = &cpuquiet_cpu_sysfs_ops, + .default_attrs = cpuquiet_default_cpu_attrs, +}; + +void cpuquiet_add_dev(struct sys_device *sys_dev, unsigned int cpu) +{ + struct cpuquiet_dev *dev; + int err; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + dev->cpu = cpu; + cpuquiet_cpu_devices[cpu] = dev; + err = kobject_init_and_add(&dev->kobj, &ktype_cpuquiet, + &sys_dev->kobj, "cpuquiet"); + if (!err) + kobject_uevent(&dev->kobj, KOBJ_ADD); +} + +void cpuquiet_remove_dev(unsigned int cpu) +{ + kobject_put(cpuquiet_cpu_devices[cpu]); +} From a8dc9f8df6c7d773078d2b9136b4d08e1ef122dc Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Wed, 23 May 2012 11:38:28 -0700 Subject: [PATCH 179/678] ARM: Config for cpuquiet framework Change-Id: I61b19497d88821f39cec8605f24028c7d7fda126 Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/105268 Reviewed-by: Simone Willett Tested-by: Simone Willett --- arch/arm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 86456c5787c..d489b8f83bd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2102,6 +2102,8 @@ endif source "drivers/cpuidle/Kconfig" +source "drivers/cpuquiet/Kconfig" + endmenu menu "Floating point emulation" From 7b5963bfca5392de6c9af12a179fe16c5bef2aac Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:42:17 +0300 Subject: [PATCH 180/678] cpuquiet: governor support Change-Id: I05b9dedc04bb3b2ddba6202a002c1e5514ec4777 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105269 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Tested-by: Sai Gurrappadi Reviewed-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/governor.c | 101 ++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 drivers/cpuquiet/governor.c diff --git a/drivers/cpuquiet/governor.c b/drivers/cpuquiet/governor.c new file mode 100644 index 00000000000..1446b9ee506 --- /dev/null +++ b/drivers/cpuquiet/governor.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include + +#include "cpuquiet.h" + +LIST_HEAD(cpuquiet_governors); +struct cpuquiet_governor *cpuquiet_curr_governor; + +struct cpuquiet_governor *cpuquiet_get_first_governor(void) +{ + if (!list_empty(&cpuquiet_governors)) + return list_entry(&cpuquiet_governors, struct cpuquiet_governor, + governor_list); + else + return NULL; +} + +struct cpuquiet_governor *cpuquiet_find_governor(const char *str) +{ + struct cpuquiet_governor *gov; + + list_for_each_entry(gov, &cpuquiet_governors, governor_list) + if (!strnicmp(str, gov->name, CPUQUIET_NAME_LEN)) + return gov; + + return NULL; +} + +int cpuquiet_switch_governor(struct cpuquiet_governor *gov) +{ + int err = 0; + + if (cpuquiet_curr_governor) { + if (cpuquiet_curr_governor->stop) + cpuquiet_curr_governor->stop(); + module_put(cpuquiet_curr_governor->owner); + } + + cpuquiet_curr_governor = gov; + + if (gov) { + if (!try_module_get(cpuquiet_curr_governor->owner)) + return -EINVAL; + if (gov->start) + err = gov->start(); + if (!err) + cpuquiet_curr_governor = gov; + } + + return err; +} + +int cpuquiet_register_governor(struct cpuquiet_governor *gov) +{ + int ret = -EEXIST; + + if (!gov) + return -EINVAL; + + mutex_lock(&cpuquiet_lock); + if (cpuquiet_find_governor(gov->name) == NULL) { + ret = 0; + list_add_tail(&gov->governor_list, &cpuquiet_governors); + if (!cpuquiet_curr_governor && cpuquiet_get_driver()) + cpuquiet_switch_governor(gov); + } + mutex_unlock(&cpuquiet_lock); + + return ret; +} + +void cpuquiet_unregister_governor(struct cpuquiet_governor *gov) +{ + if (!gov) + return; + + mutex_lock(&cpuquiet_lock); + if (cpuquiet_curr_governor == gov) + cpuquiet_switch_governor(NULL); + list_del(&gov->governor_list); + mutex_unlock(&cpuquiet_lock); +} From 45b4998658bf91ea61af9d23718a6f2538c0386f Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:44:32 +0300 Subject: [PATCH 181/678] cpuquiet: userspace governor Change-Id: If9830d423b1751cbe9493eda0a85f88e7003173f Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105270 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/governors/userspace.c | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 drivers/cpuquiet/governors/userspace.c diff --git a/drivers/cpuquiet/governors/userspace.c b/drivers/cpuquiet/governors/userspace.c new file mode 100644 index 00000000000..470056c5e32 --- /dev/null +++ b/drivers/cpuquiet/governors/userspace.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include + +static DEFINE_MUTEX(userspace_mutex); + +static int governor_set(unsigned int cpu, bool active) +{ + mutex_lock(&userspace_mutex); + if (active) + cpuquiet_wake_cpu(cpu); + else + cpuquiet_quiesence_cpu(cpu); + mutex_unlock(&userspace_mutex); + + return 0; +} + +struct cpuquiet_governor userspace_governor = { + .name = "userspace", + .store_active = governor_set, + .owner = THIS_MODULE, +}; + +static int __init init_usermode(void) +{ + return cpuquiet_register_governor(&userspace_governor); +} + +static void __exit exit_usermode(void) +{ + cpuquiet_unregister_governor(&userspace_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_usermode); +module_exit(exit_usermode); From a53a61870d4ef0e6afc66aa899fd01e7aa7454f8 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:45:48 +0300 Subject: [PATCH 182/678] cpuquiet: balanced governor This is a port of the existing governor logic in cpu-tegra3.c. Change-Id: Id79e6fc697dd0de85242fb2471bbed3d10101693 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105271 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/governors/balanced.c | 473 ++++++++++++++++++++++++++ 1 file changed, 473 insertions(+) create mode 100644 drivers/cpuquiet/governors/balanced.c diff --git a/drivers/cpuquiet/governors/balanced.c b/drivers/cpuquiet/governors/balanced.c new file mode 100644 index 00000000000..813a32e671d --- /dev/null +++ b/drivers/cpuquiet/governors/balanced.c @@ -0,0 +1,473 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPUNAMELEN 8 + +typedef enum { + CPU_SPEED_BALANCED, + CPU_SPEED_BIASED, + CPU_SPEED_SKEWED, +} CPU_SPEED_BALANCE; + +typedef enum { + IDLE, + DOWN, + UP, +} BALANCED_STATE; + +struct idle_info { + u64 idle_last; + u64 last_timestamp; + u64 idle_current; + u64 timestamp; +}; + +static DEFINE_PER_CPU(struct idle_info, idleinfo); +static DEFINE_PER_CPU(unsigned int, cpu_load); + +static struct timer_list load_timer; +static bool load_timer_active; +struct balanced_attribute { + struct attribute attr; + ssize_t (*show)(struct balanced_attribute *attr, char *buf); + ssize_t (*store)(struct balanced_attribute *attr, const char *buf, + size_t count); + unsigned long *param; +}; + +#define BALANCED_ATTRIBUTE(_name, _mode) \ + static struct balanced_attribute _name ## _attr = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = show_attribute, \ + .store = store_attribute, \ + .param = &_name, \ +} + +/* configurable parameters */ +static unsigned long balance_level = 75; +static unsigned long idle_bottom_freq; +static unsigned long idle_top_freq; +static unsigned long up_delay; +static unsigned long down_delay; + +static struct workqueue_struct *balanced_wq; +static struct delayed_work balanced_work; +static BALANCED_STATE balanced_state; +static struct kobject *balanced_kobject; + +static void calculate_load_timer(unsigned long data) +{ + int i; + u64 idle_time, elapsed_time; + + if (!load_timer_active) + return; + + for_each_online_cpu(i) { + struct idle_info *iinfo = &per_cpu(idleinfo, i); + unsigned int *load = &per_cpu(cpu_load, i); + + iinfo->idle_last = iinfo->idle_current; + iinfo->last_timestamp = iinfo->timestamp; + iinfo->idle_current = + get_cpu_idle_time_us(i, &iinfo->timestamp); + elapsed_time = iinfo->timestamp - iinfo->last_timestamp; + + idle_time = iinfo->idle_current - iinfo->idle_last; + idle_time *= 100; + do_div(idle_time, elapsed_time); + *load = 100 - idle_time; + } + mod_timer(&load_timer, jiffies + msecs_to_jiffies(100)); +} + +static void start_load_timer(void) +{ + int i; + + if (load_timer_active) + return; + + load_timer_active = true; + + for_each_online_cpu(i) { + struct idle_info *iinfo = &per_cpu(idleinfo, i); + + iinfo->idle_current = + get_cpu_idle_time_us(i, &iinfo->timestamp); + } + mod_timer(&load_timer, jiffies + msecs_to_jiffies(100)); +} + +static void stop_load_timer(void) +{ + if (!load_timer_active) + return; + + load_timer_active = false; + del_timer(&load_timer); +} + +static unsigned int get_slowest_cpu_n(void) +{ + unsigned int cpu = nr_cpu_ids; + unsigned long minload = ULONG_MAX; + int i; + + for_each_online_cpu(i) { + unsigned int *load = &per_cpu(cpu_load, i); + + if ((i > 0) && (minload > *load)) { + cpu = i; + minload = *load; + } + } + + return cpu; +} + +static unsigned int cpu_highest_speed(void) +{ + unsigned int maxload = 0; + int i; + + for_each_online_cpu(i) { + unsigned int *load = &per_cpu(cpu_load, i); + + maxload = max(maxload, *load); + } + + return maxload; +} + +static unsigned int count_slow_cpus(unsigned int limit) +{ + unsigned int cnt = 0; + int i; + + for_each_online_cpu(i) { + unsigned int *load = &per_cpu(cpu_load, i); + + if (*load <= limit) + cnt++; + } + + return cnt; +} + +static CPU_SPEED_BALANCE balanced_speed_balance(void) +{ + unsigned long highest_speed = cpu_highest_speed(); + unsigned long balanced_speed = highest_speed * balance_level / 100; + unsigned long skewed_speed = balanced_speed / 2; + unsigned int nr_cpus = num_online_cpus(); + unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + + /* balanced: freq targets for all CPUs are above 50% of highest speed + biased: freq target for at least one CPU is below 50% threshold + skewed: freq targets for at least 2 CPUs are below 25% threshold */ + if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus) + return CPU_SPEED_SKEWED; + + if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus) + return CPU_SPEED_BIASED; + + return CPU_SPEED_BALANCED; +} + +static void balanced_work_func(struct work_struct *work) +{ + bool up = false; + unsigned int cpu = nr_cpu_ids; + CPU_SPEED_BALANCE balance; + + switch (balanced_state) { + case IDLE: + break; + case DOWN: + cpu = get_slowest_cpu_n(); + if (cpu < nr_cpu_ids) { + up = false; + queue_delayed_work(balanced_wq, + &balanced_work, down_delay); + } else + stop_load_timer(); + break; + case UP: + balance = balanced_speed_balance(); + switch (balance) { + + /* cpu speed is up and balanced - one more on-line */ + case CPU_SPEED_BALANCED: + cpu = cpumask_next_zero(0, cpu_online_mask); + if (cpu < nr_cpu_ids) + up = true; + break; + /* cpu speed is up, but skewed - remove one core */ + case CPU_SPEED_SKEWED: + cpu = get_slowest_cpu_n(); + if (cpu < nr_cpu_ids) + up = false; + break; + /* cpu speed is up, but under-utilized - do nothing */ + case CPU_SPEED_BIASED: + default: + break; + } + queue_delayed_work( + balanced_wq, &balanced_work, up_delay); + break; + default: + pr_err("%s: invalid cpuquiet balanced governor state %d\n", + __func__, balanced_state); + } + + if (cpu < nr_cpu_ids) { + if (up) + cpuquiet_wake_cpu(cpu); + else + cpuquiet_quiesence_cpu(cpu); + } +} + +static int balanced_cpufreq_transition(struct notifier_block *nb, + unsigned long state, void *data) +{ + struct cpufreq_freqs *freqs = data; + unsigned long cpu_freq; + + if (state == CPUFREQ_POSTCHANGE || state == CPUFREQ_RESUMECHANGE) { + cpu_freq = freqs->new; + + switch (balanced_state) { + case IDLE: + if (cpu_freq > idle_top_freq) { + balanced_state = UP; + queue_delayed_work( + balanced_wq, &balanced_work, up_delay); + start_load_timer(); + } else if (cpu_freq <= idle_bottom_freq) { + balanced_state = DOWN; + queue_delayed_work( + balanced_wq, &balanced_work, + down_delay); + start_load_timer(); + } + break; + case DOWN: + if (cpu_freq > idle_top_freq) { + balanced_state = UP; + queue_delayed_work( + balanced_wq, &balanced_work, up_delay); + start_load_timer(); + } + break; + case UP: + if (cpu_freq <= idle_bottom_freq) { + balanced_state = DOWN; + queue_delayed_work(balanced_wq, + &balanced_work, down_delay); + start_load_timer(); + } + break; + default: + pr_err("%s: invalid tegra hotplug state %d\n", + __func__, balanced_state); + } + } + + return NOTIFY_OK; +} + +static struct notifier_block balanced_cpufreq_nb = { + .notifier_call = balanced_cpufreq_transition, +}; + +static ssize_t show_attribute(struct balanced_attribute *battr, char *buf) +{ + return sprintf(buf, "%lu\n", *(battr->param)); +} + +static ssize_t store_attribute(struct balanced_attribute *battr, + const char *buf, size_t count) +{ + int err; + unsigned long val; + + err = strict_strtoul(buf, 0, &val); + if (err < 0) + return err; + + *(battr->param) = val; + + return count; +} + +static ssize_t balanced_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct balanced_attribute *battr = + container_of(attr, struct balanced_attribute, attr); + + if (battr->store) + return battr->store(battr, buf, count); + + return -EINVAL; +} + +static ssize_t balanced_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct balanced_attribute *battr = + container_of(attr, struct balanced_attribute, attr); + + return battr->show(battr, buf); +} + +BALANCED_ATTRIBUTE(balance_level, 0644); +BALANCED_ATTRIBUTE(idle_bottom_freq, 0644); +BALANCED_ATTRIBUTE(idle_top_freq, 0644); +BALANCED_ATTRIBUTE(up_delay, 0644); +BALANCED_ATTRIBUTE(down_delay, 0644); + +static struct attribute *balanced_attributes[] = { + &balance_level_attr.attr, + &idle_bottom_freq_attr.attr, + &idle_top_freq_attr.attr, + &up_delay_attr.attr, + &down_delay_attr.attr, + NULL, +}; + +static const struct sysfs_ops balanced_sysfs_ops = { + .show = balanced_sysfs_show, + .store = balanced_sysfs_store, +}; + +static struct kobj_type ktype_balanced = { + .sysfs_ops = &balanced_sysfs_ops, + .default_attrs = balanced_attributes, +}; + +static int balanced_sysfs(void) +{ + int err; + + balanced_kobject = kzalloc(sizeof(*balanced_kobject), + GFP_KERNEL); + + if (!balanced_kobject) + return -ENOMEM; + + err = cpuquiet_kobject_init(balanced_kobject, &ktype_balanced, + "balanced"); + + if (err) + kfree(balanced_kobject); + + return err; +} + +static void balanced_stop(void) +{ + + /* + first unregister the notifiers. This ensures the governor state + can't be modified by a cpufreq transition + */ + cpufreq_unregister_notifier(&balanced_cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + + /* now we can force the governor to be idle */ + balanced_state = IDLE; + cancel_delayed_work_sync(&balanced_work); + destroy_workqueue(balanced_wq); + del_timer(&load_timer); + + kobject_put(balanced_kobject); +} + +static int balanced_start(void) +{ + int err, count; + struct cpufreq_frequency_table *table; + + err = balanced_sysfs(); + if (err) + return err; + + balanced_wq = alloc_workqueue("cpuquiet-balanced", + WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1); + if (!balanced_wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&balanced_work, balanced_work_func); + + up_delay = msecs_to_jiffies(1000); + down_delay = msecs_to_jiffies(2000); + + table = cpufreq_frequency_get_table(0); + for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++) + ; + + idle_top_freq = table[(count / 2) - 1].frequency; + idle_bottom_freq = table[(count / 2) - 2].frequency; + + cpufreq_register_notifier(&balanced_cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + + init_timer(&load_timer); + load_timer.function = calculate_load_timer; + + return 0; +} + +struct cpuquiet_governor balanced_governor = { + .name = "balanced", + .start = balanced_start, + .stop = balanced_stop, + .owner = THIS_MODULE, +}; + +static int __init init_balanced(void) +{ + return cpuquiet_register_governor(&balanced_governor); +} + +static void __exit exit_balanced(void) +{ + cpuquiet_unregister_governor(&balanced_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_balanced); +module_exit(exit_balanced); + From d8c66ca4e2995c57cad0fbe8512af3d2ec3464d5 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:46:09 +0300 Subject: [PATCH 183/678] cpuquiet: Makefile for governors Change-Id: I33018bb5db39f2881a3defc55758681cfb1d6284 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105272 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/governors/Makefile | 1 + 1 file changed, 1 insertion(+) create mode 100644 drivers/cpuquiet/governors/Makefile diff --git a/drivers/cpuquiet/governors/Makefile b/drivers/cpuquiet/governors/Makefile new file mode 100644 index 00000000000..c7080312708 --- /dev/null +++ b/drivers/cpuquiet/governors/Makefile @@ -0,0 +1 @@ +obj-y += userspace.o balanced.o From fabac48825508f1d1226a9f63044a659787d5365 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 11:18:38 +0300 Subject: [PATCH 184/678] cpuquiet: Makefile for cpuquiet framework Change-Id: Ia071b03c6073c514b99457e35ebbd65ef32a6906 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105273 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Diwakar Tundlam --- drivers/cpuquiet/Makefile | 1 + 1 file changed, 1 insertion(+) create mode 100644 drivers/cpuquiet/Makefile diff --git a/drivers/cpuquiet/Makefile b/drivers/cpuquiet/Makefile new file mode 100644 index 00000000000..0502d4f3301 --- /dev/null +++ b/drivers/cpuquiet/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_CPUQUIET_FRAMEWORK) += cpuquiet.o driver.o sysfs.o governor.o governors/ From 000dd143b2a9c3920afee4087279cf88f9efd52d Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 12:56:08 +0300 Subject: [PATCH 185/678] ARM: tegra: cpuquiet driver for Tegra3 Change-Id: Id7427bab50c6e285efe76afa234435bc984fc011 Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/105274 Reviewed-by: Rohan Somvanshi Tested-by: Rohan Somvanshi --- arch/arm/mach-tegra/Makefile | 4 + arch/arm/mach-tegra/cpuquiet.c | 344 +++++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 arch/arm/mach-tegra/cpuquiet.c diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index 9fceb2d3511..9ea2aefe87a 100755 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -79,8 +79,12 @@ obj-y += reset.o obj-$(CONFIG_TEGRA_SYSTEM_DMA) += dma.o obj-$(CONFIG_CPU_FREQ) += cpu-tegra.o ifeq ($(CONFIG_TEGRA_AUTO_HOTPLUG),y) +ifeq ($(CONFIG_CPUQUIET_FRAMEWORK),y) +obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpuquiet.o +else obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpu-tegra3.o endif +endif obj-$(CONFIG_TEGRA_PCI) += pcie.o obj-$(CONFIG_USB_SUPPORT) += usb_phy.o ifeq ($(CONFIG_CPU_IDLE),y) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c new file mode 100644 index 00000000000..2b5826d9740 --- /dev/null +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -0,0 +1,344 @@ +/* + * arch/arm/mach-tegra/cpuquiet.c + * + * Cpuquiet driver for Tegra3 CPUs + * + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pm.h" +#include "cpu-tegra.h" +#include "clock.h" + +#define INITIAL_STATE TEGRA_CPQ_IDLE +#define UP_DELAY_MS 70 +#define DOWN_DELAY_MS 2000 + +static struct mutex *tegra3_cpu_lock; +static struct workqueue_struct *cpuquiet_wq; +static struct delayed_work cpuquiet_work; +static struct work_struct minmax_work; + +static bool no_lp; +module_param(no_lp, bool, 0644); + +static unsigned long up_delay; +module_param(up_delay, ulong, 0644); +static unsigned long down_delay; +module_param(down_delay, ulong, 0644); + +static int mp_overhead = 10; +module_param(mp_overhead, int, 0644); +static unsigned int idle_top_freq; +module_param(idle_top_freq, uint, 0644); +static unsigned int idle_bottom_freq; +module_param(idle_bottom_freq, uint, 0644); + +static struct clk *cpu_clk; +static struct clk *cpu_g_clk; +static struct clk *cpu_lp_clk; + +static struct cpumask cr_online_requests; + +enum { + TEGRA_CPQ_DISABLED = 0, + TEGRA_CPQ_IDLE, + TEGRA_CPQ_SWITCH_TO_LP, + TEGRA_CPQ_SWITCH_TO_G, +}; + +static int cpq_state; + +static int update_core_config(unsigned int cpunumber, bool up) +{ + int ret = -EINVAL; + unsigned int nr_cpus = num_online_cpus(); + int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + + if (cpq_state == TEGRA_CPQ_DISABLED || cpunumber >= nr_cpu_ids) + return ret; + + if (up) { + if(is_lp_cluster()) { + cpumask_set_cpu(cpunumber, &cr_online_requests); + ret = -EBUSY; + } else { + if (tegra_cpu_edp_favor_up(nr_cpus, mp_overhead) && + nr_cpus < max_cpus) + ret = cpu_up(cpunumber); + } + } else { + if (is_lp_cluster()) { + ret = -EBUSY; + } else { + if (nr_cpus > min_cpus) + ret = cpu_down(cpunumber); + } + } + + return ret; +} + +static int tegra_quiesence_cpu(unsigned int cpunumber) +{ + return update_core_config(cpunumber, false); +} + +static int tegra_wake_cpu(unsigned int cpunumber) +{ + return update_core_config(cpunumber, true); +} + +static struct cpuquiet_driver tegra_cpuquiet_driver = { + .name = "tegra", + .quiesence_cpu = tegra_quiesence_cpu, + .wake_cpu = tegra_wake_cpu, +}; + +static void apply_core_config(void) +{ + unsigned int cpu; + + if (is_lp_cluster() || cpq_state == TEGRA_CPQ_DISABLED) + return; + + for_each_cpu_mask(cpu, cr_online_requests) { + if (cpu < nr_cpu_ids && !cpu_online(cpu)) + if (!tegra_wake_cpu(cpu)) + cpumask_clear_cpu(cpu, &cr_online_requests); + } +} + +static void tegra_cpuquiet_work_func(struct work_struct *work) +{ + bool update_cr_config = false; + + mutex_lock(tegra3_cpu_lock); + + switch(cpq_state) { + case TEGRA_CPQ_DISABLED: + case TEGRA_CPQ_IDLE: + break; + case TEGRA_CPQ_SWITCH_TO_G: + if (is_lp_cluster()) { + if(!clk_set_parent(cpu_clk, cpu_g_clk)) { + /*catch-up with governor target speed */ + tegra_cpu_set_speed_cap(NULL); + /* process pending core requests*/ + update_cr_config = true; + } + } + break; + case TEGRA_CPQ_SWITCH_TO_LP: + if (!is_lp_cluster() && !no_lp && + num_online_cpus() == 1) { + if (!clk_set_parent(cpu_clk, cpu_lp_clk)) { + /*catch-up with governor target speed*/ + tegra_cpu_set_speed_cap(NULL); + } + } + break; + default: + pr_err("%s: invalid tegra hotplug state %d\n", + __func__, cpq_state); + } + + mutex_unlock(tegra3_cpu_lock); + + if (update_cr_config) + apply_core_config(); +} + +static void min_max_constraints_workfunc(struct work_struct *work) +{ + int count = -1; + bool up = false; + unsigned int cpu; + + int nr_cpus = num_online_cpus(); + int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + + if (is_lp_cluster()) + return; + + if (nr_cpus < min_cpus) { + up = true; + count = min_cpus - nr_cpus; + } else if (nr_cpus > max_cpus && max_cpus >= min_cpus) { + count = nr_cpus - max_cpus; + } + + for (;count > 0; count--) { + if (up) { + cpu = cpumask_next_zero(0, cpu_online_mask); + if (cpu < nr_cpu_ids) + cpu_up(cpu); + else + break; + } else { + cpu = cpumask_next(0, cpu_online_mask); + if (cpu < nr_cpu_ids) + cpu_down(cpu); + else + break; + } + } +} + +static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) +{ + mutex_lock(tegra3_cpu_lock); + + if ((n >= 2) && is_lp_cluster()) { + /* make sure cpu rate is within g-mode range before switching */ + unsigned int speed = max( + tegra_getspeed(0), clk_get_min_rate(cpu_g_clk) / 1000); + tegra_update_cpu_speed(speed); + + clk_set_parent(cpu_clk, cpu_g_clk); + } + + tegra_cpu_set_speed_cap(NULL); + mutex_unlock(tegra3_cpu_lock); + + schedule_work(&minmax_work); + + return NOTIFY_OK; +} + +static int max_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) +{ + if (n < num_online_cpus()) + schedule_work(&minmax_work); + + return NOTIFY_OK; +} + +void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) +{ + if (!is_g_cluster_present()) + return; + + if (cpq_state == TEGRA_CPQ_DISABLED) + return; + + if (suspend) { + cpq_state = TEGRA_CPQ_IDLE; + + /* Switch to G-mode if suspend rate is high enough */ + if (is_lp_cluster() && (cpu_freq >= idle_bottom_freq)) { + clk_set_parent(cpu_clk, cpu_g_clk); + } + return; + } + + if (is_lp_cluster() && pm_qos_request(PM_QOS_MIN_ONLINE_CPUS) >= 2) { + if (cpq_state != TEGRA_CPQ_SWITCH_TO_G) { + /* Force switch */ + cpq_state = TEGRA_CPQ_SWITCH_TO_G; + queue_delayed_work( + cpuquiet_wq, &cpuquiet_work, up_delay); + } + return; + } + + if (is_lp_cluster() && (cpu_freq >= idle_top_freq || no_lp)) { + cpq_state = TEGRA_CPQ_SWITCH_TO_G; + queue_delayed_work(cpuquiet_wq, &cpuquiet_work, up_delay); + } else if (!is_lp_cluster() && !no_lp && + cpu_freq <= idle_bottom_freq) { + cpq_state = TEGRA_CPQ_SWITCH_TO_LP; + queue_delayed_work(cpuquiet_wq, &cpuquiet_work, down_delay); + } else { + cpq_state = TEGRA_CPQ_IDLE; + } +} + +static struct notifier_block min_cpus_notifier = { + .notifier_call = min_cpus_notify, +}; + +static struct notifier_block max_cpus_notifier = { + .notifier_call = max_cpus_notify, +}; + +int tegra_auto_hotplug_init(struct mutex *cpu_lock) +{ + /* + * Not bound to the issuer CPU (=> high-priority), has rescue worker + * task, single-threaded, freezable. + */ + cpuquiet_wq = alloc_workqueue( + "cpuquiet", WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1); + + if (!cpuquiet_wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&cpuquiet_work, tegra_cpuquiet_work_func); + INIT_WORK(&minmax_work, min_max_constraints_workfunc); + + cpu_clk = clk_get_sys(NULL, "cpu"); + cpu_g_clk = clk_get_sys(NULL, "cpu_g"); + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + + if (IS_ERR(cpu_clk) || IS_ERR(cpu_g_clk) || IS_ERR(cpu_lp_clk)) + return -ENOENT; + + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + idle_bottom_freq = clk_get_min_rate(cpu_g_clk) / 1000; + + up_delay = msecs_to_jiffies(UP_DELAY_MS); + down_delay = msecs_to_jiffies(DOWN_DELAY_MS); + cpumask_clear(&cr_online_requests); + tegra3_cpu_lock = cpu_lock; + + cpq_state = INITIAL_STATE; + + pr_info("Tegra cpuquiet initialized: %s\n", + (cpq_state == TEGRA_CPQ_DISABLED) ? "disabled" : "enabled"); + + if (pm_qos_add_notifier(PM_QOS_MIN_ONLINE_CPUS, &min_cpus_notifier)) + pr_err("%s: Failed to register min cpus PM QoS notifier\n", + __func__); + if (pm_qos_add_notifier(PM_QOS_MAX_ONLINE_CPUS, &max_cpus_notifier)) + pr_err("%s: Failed to register max cpus PM QoS notifier\n", + __func__); + + return cpuquiet_register_driver(&tegra_cpuquiet_driver); +} + +void tegra_auto_hotplug_exit(void) +{ + destroy_workqueue(cpuquiet_wq); + cpuquiet_unregister_driver(&tegra_cpuquiet_driver); +} From 865bc0ce4f821d400a7b811504f915274422507f Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Fri, 30 Mar 2012 12:58:13 +0300 Subject: [PATCH 186/678] cpuquiet: Added cpuquiet to driver Makefile. Change-Id: I41a722eabdca139f443964b8c6440a8bf768bd31 Signed-off-by: Peter De Schrijver Reviewed-on: http://git-master/r/105275 Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Diwakar Tundlam --- drivers/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/Makefile b/drivers/Makefile index 6357e71dcc0..24e48fc3526 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -5,6 +5,7 @@ # Rewritten to use lists instead of if-statements. # +obj-$(CONFIG_CPUQUIET_FRAMEWORK)+= cpuquiet/ obj-y += gpio/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_PARISC) += parisc/ From 6ceda95dcbb7a018deaa5d0af3e1997fa1164a95 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Mon, 11 Jun 2012 18:41:27 +0300 Subject: [PATCH 187/678] drivers: cpuquiet: fix error message Signed-off-by: Peter De Schrijver Change-Id: If04c699e002542bd8ce4b37b2367d7ec496c284e Reviewed-on: http://git-master/r/107959 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Peter De Schrijver Tested-by: Peter De Schrijver Reviewed-by: Matthew Longnecker Reviewed-by: Juha Tukkinen --- drivers/cpuquiet/governors/balanced.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuquiet/governors/balanced.c b/drivers/cpuquiet/governors/balanced.c index 813a32e671d..270bbf5d1f2 100644 --- a/drivers/cpuquiet/governors/balanced.c +++ b/drivers/cpuquiet/governors/balanced.c @@ -299,8 +299,8 @@ static int balanced_cpufreq_transition(struct notifier_block *nb, } break; default: - pr_err("%s: invalid tegra hotplug state %d\n", - __func__, balanced_state); + pr_err("%s: invalid cpuquiet balanced governor " + "state %d\n", __func__, balanced_state); } } From 917047011e62b1eeea78344100a35bd15e48c3a1 Mon Sep 17 00:00:00 2001 From: Diwakar Tundlam Date: Mon, 7 May 2012 15:12:25 -0700 Subject: [PATCH 188/678] scheduler: compute time-average nr_running per run-queue Compute the time-average number of running tasks per run-queue for a trailing window of a fixed time period. The detla add/sub to the average value is weighted by the amount of time per nr_running value relative to the total measurement period. Change-Id: I076e24ff4ed65bed3b8dd8d2b279a503318071ff Signed-off-by: Diwakar Tundlam (cherry picked from commit 3a12d7499cee352e8a46eaf700259ba3c733f0e3) Reviewed-on: http://git-master/r/111635 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Peter Boonstoppel Reviewed-by: Yu-Huan Hsu --- include/linux/sched.h | 1 + kernel/sched.c | 41 +++++++++++++++++++++++++++++++++++++++++ kernel/sched_debug.c | 3 +++ 3 files changed, 45 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 11af9aaf064..a661d2cddb7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -139,6 +139,7 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); +extern unsigned long avg_nr_running(void); extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); diff --git a/kernel/sched.c b/kernel/sched.c index f8d81ce1961..8ccdb3266fd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -472,6 +472,10 @@ struct rq { #endif int skip_clock_update; + /* time-based average load */ + u64 nr_last_stamp; + unsigned int ave_nr_running; + /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -1759,13 +1763,40 @@ static const struct sched_class rt_sched_class; #include "sched_stats.h" +/* 27 ~= 134217728ns = 134.2ms + * 26 ~= 67108864ns = 67.1ms + * 25 ~= 33554432ns = 33.5ms + * 24 ~= 16777216ns = 16.8ms + */ +#define NR_AVE_PERIOD_EXP 27 +#define NR_AVE_SCALE(x) ((x) << FSHIFT) +#define NR_AVE_PERIOD (1 << NR_AVE_PERIOD_EXP) +#define NR_AVE_DIV_PERIOD(x) ((x) >> NR_AVE_PERIOD_EXP) + +static inline void do_avg_nr_running(struct rq *rq) +{ + s64 nr, deltax; + + deltax = rq->clock_task - rq->nr_last_stamp; + rq->nr_last_stamp = rq->clock_task; + nr = NR_AVE_SCALE(rq->nr_running); + + if (deltax > NR_AVE_PERIOD) + rq->ave_nr_running = nr; + else + rq->ave_nr_running += + NR_AVE_DIV_PERIOD(deltax * (nr - rq->ave_nr_running)); +} + static void inc_nr_running(struct rq *rq) { + do_avg_nr_running(rq); rq->nr_running++; } static void dec_nr_running(struct rq *rq) { + do_avg_nr_running(rq); rq->nr_running--; } @@ -3258,6 +3289,16 @@ unsigned long nr_iowait(void) return sum; } +unsigned long avg_nr_running(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->ave_nr_running; + + return sum; +} + unsigned long nr_iowait_cpu(int cpu) { struct rq *this = cpu_rq(cpu); diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index a6710a112b4..6371af0e461 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -264,6 +264,9 @@ static void print_cpu(struct seq_file *m, int cpu) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) P(nr_running); + SEQ_printf(m, " .%-30s: %d.%03d \n", "ave_nr_running", + rq->ave_nr_running / FIXED_1, + ((rq->ave_nr_running % FIXED_1) * 1000) / FIXED_1); SEQ_printf(m, " .%-30s: %lu\n", "load", rq->load.weight); P(nr_switches); From 9fe14f2018259569c3bc18afa0059514243a0ac5 Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Wed, 16 May 2012 14:27:13 -0700 Subject: [PATCH 189/678] proc: enhance time-average nr_running stats Add time-average nr_running to loadavg printout Bug 958978 Change-Id: I5c6904efb52a86f4964eb66c1576fc91f60f5b1d Signed-off-by: Alex Frid (cherry picked from commit 86f3642cc44a69d1e4798719bd9182cd6923f526) Reviewed-on: http://git-master/r/111636 Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Peter Boonstoppel Reviewed-by: Yu-Huan Hsu --- fs/proc/loadavg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 1afa4dd4cae..8d95888b22c 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -13,15 +13,17 @@ static int loadavg_proc_show(struct seq_file *m, void *v) { unsigned long avnrun[3]; + unsigned long time_avnrun = avg_nr_running(); get_avenrun(avnrun, FIXED_1/200, 0); - seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n", + seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d %lu.%02lu\n", LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]), LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, - task_active_pid_ns(current)->last_pid); + task_active_pid_ns(current)->last_pid, + LOAD_INT(time_avnrun), LOAD_FRAC(time_avnrun)); return 0; } From 9c5fdd99fc4e07851fd337e9c9b8b5e90e93654f Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Fri, 18 May 2012 12:18:38 -0700 Subject: [PATCH 190/678] scheduler: Re-compute time-average nr_running on read Re-compute time-average nr_running when it is read. This would prevent reading stalled average value if there were no run-queue changes for a long time. New average value is returned to the reader, but not stored to avoid concurrent writes. Light-weight sequential counter synchronization is used to assure data consistency for re-computing average. Change-Id: I8e4ea1b28ea00b3ddaf6ef7cdcd27866f87d360b Signed-off-by: Alex Frid (cherry picked from commit 527a759d9b40bf57958eb002edd2bb82014dab99) Reviewed-on: http://git-master/r/111637 Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Peter Boonstoppel Reviewed-by: Yu-Huan Hsu --- kernel/sched.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 8ccdb3266fd..8423df5f305 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -475,6 +475,7 @@ struct rq { /* time-based average load */ u64 nr_last_stamp; unsigned int ave_nr_running; + seqcount_t ave_seqcnt; /* capture load from *all* tasks on this cpu: */ struct load_weight load; @@ -1773,31 +1774,39 @@ static const struct sched_class rt_sched_class; #define NR_AVE_PERIOD (1 << NR_AVE_PERIOD_EXP) #define NR_AVE_DIV_PERIOD(x) ((x) >> NR_AVE_PERIOD_EXP) -static inline void do_avg_nr_running(struct rq *rq) +static inline unsigned int do_avg_nr_running(struct rq *rq) { s64 nr, deltax; + unsigned int ave_nr_running = rq->ave_nr_running; deltax = rq->clock_task - rq->nr_last_stamp; - rq->nr_last_stamp = rq->clock_task; nr = NR_AVE_SCALE(rq->nr_running); if (deltax > NR_AVE_PERIOD) - rq->ave_nr_running = nr; + ave_nr_running = nr; else - rq->ave_nr_running += - NR_AVE_DIV_PERIOD(deltax * (nr - rq->ave_nr_running)); + ave_nr_running += + NR_AVE_DIV_PERIOD(deltax * (nr - ave_nr_running)); + + return ave_nr_running; } static void inc_nr_running(struct rq *rq) { - do_avg_nr_running(rq); + write_seqcount_begin(&rq->ave_seqcnt); + rq->ave_nr_running = do_avg_nr_running(rq); + rq->nr_last_stamp = rq->clock_task; rq->nr_running++; + write_seqcount_end(&rq->ave_seqcnt); } static void dec_nr_running(struct rq *rq) { - do_avg_nr_running(rq); + write_seqcount_begin(&rq->ave_seqcnt); + rq->ave_nr_running = do_avg_nr_running(rq); + rq->nr_last_stamp = rq->clock_task; rq->nr_running--; + write_seqcount_end(&rq->ave_seqcnt); } static void set_load_weight(struct task_struct *p) @@ -3292,9 +3301,26 @@ unsigned long nr_iowait(void) unsigned long avg_nr_running(void) { unsigned long i, sum = 0; + unsigned int seqcnt, ave_nr_running; - for_each_online_cpu(i) - sum += cpu_rq(i)->ave_nr_running; + for_each_online_cpu(i) { + struct rq *q = cpu_rq(i); + + /* + * Update average to avoid reading stalled value if there were + * no run-queue changes for a long time. On the other hand if + * the changes are happening right now, just read current value + * directly. + */ + seqcnt = read_seqcount_begin(&q->ave_seqcnt); + ave_nr_running = do_avg_nr_running(q); + if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) { + read_seqcount_begin(&q->ave_seqcnt); + ave_nr_running = q->ave_nr_running; + } + + sum += ave_nr_running; + } return sum; } From dfcdad6d088b7aa961e7d7ade14bf16b401f6e5f Mon Sep 17 00:00:00 2001 From: Wen Yi Date: Tue, 12 Jun 2012 12:01:11 -0700 Subject: [PATCH 191/678] ARM: tegra: power: Use runnable threads average for hotplug Sample scheduler runnable threads average in auto-hotplug work function and use it to determine the auto-hotplug target for number of on-line cores. Use cpu up delay as sampling period, and enforce down delay by checking last cpu configuration change time stamp. Bug 958978 Change-Id: I4280a11d39914687e6ffaa6f38df594d10aedaa9 Signed-off-by: Alex Frid (cherry picked from commit 507e2ef5e4f09b23de2e924003dba259d3c8bc3c) Reviewed-on: http://git-master/r/111638 Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Peter Boonstoppel Reviewed-by: Yu-Huan Hsu --- arch/arm/mach-tegra/cpu-tegra3.c | 47 ++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index a020f5ba9bb..c311e97c1ce 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -73,6 +73,8 @@ static struct clk *cpu_clk; static struct clk *cpu_g_clk; static struct clk *cpu_lp_clk; +static unsigned long last_change_time; + static struct { cputime64_t time_up_total; u64 last_update; @@ -186,6 +188,14 @@ enum { TEGRA_CPU_SPEED_SKEWED, }; +#define NR_FSHIFT 2 +static unsigned int nr_run_thresholds[] = { +/* 1, 2, 3, 4 - on-line cpus target */ + 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ +}; +static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ +static unsigned int nr_run_last; + static noinline int tegra_cpu_speed_balance(void) { unsigned long highest_speed = tegra_cpu_highest_speed(); @@ -194,17 +204,36 @@ static noinline int tegra_cpu_speed_balance(void) unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; unsigned int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + unsigned int avg_nr_run = avg_nr_running(); + unsigned int nr_run; + + /* Evaluate: + * - distribution of freq targets for already on-lined CPUs + * - average number of runnable threads + * - effective MIPS available within EDP frequency limits, + * and return: + * TEGRA_CPU_SPEED_BALANCED to bring one more CPU core on-line + * TEGRA_CPU_SPEED_BIASED to keep CPU core composition unchanged + * TEGRA_CPU_SPEED_SKEWED to remove CPU core off-line + */ + for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { + unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; + if (nr_run_last <= nr_run) + nr_threshold += nr_run_hysteresis; + if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT))) + break; + } + nr_run_last = nr_run; - /* balanced: freq targets for all CPUs are above 50% of highest speed - biased: freq target for at least one CPU is below 50% threshold - skewed: freq targets for at least 2 CPUs are below 25% threshold */ if (((tegra_count_slow_cpus(skewed_speed) >= 2) || + (nr_run < nr_cpus) || tegra_cpu_edp_favor_down(nr_cpus, mp_overhead) || (highest_speed <= idle_bottom_freq) || (nr_cpus > max_cpus)) && (nr_cpus > min_cpus)) return TEGRA_CPU_SPEED_SKEWED; if (((tegra_count_slow_cpus(balanced_speed) >= 1) || + (nr_run <= nr_cpus) || (!tegra_cpu_edp_favor_up(nr_cpus, mp_overhead)) || (highest_speed <= idle_bottom_freq) || (nr_cpus == max_cpus)) && (nr_cpus >= min_cpus)) @@ -222,7 +251,6 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) bool up = false; unsigned int cpu = nr_cpu_ids; unsigned long now = jiffies; - static unsigned long last_change_time; mutex_lock(tegra3_cpu_lock); @@ -235,7 +263,8 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { up = false; } else if (!is_lp_cluster() && !no_lp && - !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS)) { + !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS) && + ((now - last_change_time) >= down_delay)) { if(!clk_set_parent(cpu_clk, cpu_lp_clk)) { hp_stats_update(CONFIG_NR_CPUS, true); hp_stats_update(0, false); @@ -245,11 +274,12 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) } } queue_delayed_work( - hotplug_wq, &hotplug_work, down_delay); + hotplug_wq, &hotplug_work, up2gn_delay); break; case TEGRA_HP_UP: if (is_lp_cluster() && !no_lp) { if(!clk_set_parent(cpu_clk, cpu_g_clk)) { + last_change_time = now; hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); /* catch-up with governor target speed */ @@ -316,6 +346,7 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) tegra_update_cpu_speed(speed); if (!clk_set_parent(cpu_clk, cpu_g_clk)) { + last_change_time = jiffies; hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } @@ -381,7 +412,7 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) } else if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( - hotplug_wq, &hotplug_work, down_delay); + hotplug_wq, &hotplug_work, up_delay); } break; case TEGRA_HP_DOWN: @@ -397,7 +428,7 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( - hotplug_wq, &hotplug_work, down_delay); + hotplug_wq, &hotplug_work, up_delay); } else if (cpu_freq <= top_freq) { hp_state = TEGRA_HP_IDLE; } From 0157daa40cd1448fc3bcdc59412520c4d9acd052 Mon Sep 17 00:00:00 2001 From: Xiao Bo Zhao Date: Thu, 7 Jun 2012 17:20:18 -0700 Subject: [PATCH 192/678] arm: tegra: hotplug: tune runnable thread params. Tuned the runnable threads threshold from 5/9/13 to 5/9/10 in order to improve performance Bug 958978 Change-Id: I77abcd0077845517f2b5f7487c547f8a5157c2c7 Signed-off-by: Wen Yi (cherry picked from commit 25a97f57661353fbb5ee40faed296befbf635178) Reviewed-on: http://git-master/r/111639 Reviewed-by: Sai Gurrappadi Tested-by: Sai Gurrappadi Reviewed-by: Peter Boonstoppel Reviewed-by: Yu-Huan Hsu --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index c311e97c1ce..c5a8680ee10 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -191,7 +191,7 @@ enum { #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ - 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ + 5, 9, 10, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; From dfad2f7ddafe80b6a2138c02f30d567d95f7d9fe Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Mon, 18 Jun 2012 18:22:33 -0700 Subject: [PATCH 193/678] cpuquiet: Updated balanced governor to use the runnable threads patch The balanced governor now looks at the average number of runnable threads when bringing cores online and offline. The balanced governor parameters have also been updated to reflect a similar patch for autohotplug. Change-Id: I8dac26659ba43d95a68830c6cc268591a7f03f80 Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/111282 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Peter De Schrijver Reviewed-by: Peter Boonstoppel GVS: Gerrit_Virtual_Submit Reviewed-by: Aleksandr Frid Reviewed-by: Yu-Huan Hsu --- drivers/cpuquiet/governors/balanced.c | 53 ++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/drivers/cpuquiet/governors/balanced.c b/drivers/cpuquiet/governors/balanced.c index 270bbf5d1f2..968344e220d 100644 --- a/drivers/cpuquiet/governors/balanced.c +++ b/drivers/cpuquiet/governors/balanced.c @@ -72,12 +72,13 @@ struct balanced_attribute { } /* configurable parameters */ -static unsigned long balance_level = 75; +static unsigned long balance_level = 60; static unsigned long idle_bottom_freq; static unsigned long idle_top_freq; static unsigned long up_delay; static unsigned long down_delay; - +static unsigned long last_change_time; +static unsigned long load_sample_rate = 20; // msec static struct workqueue_struct *balanced_wq; static struct delayed_work balanced_work; static BALANCED_STATE balanced_state; @@ -106,7 +107,7 @@ static void calculate_load_timer(unsigned long data) do_div(idle_time, elapsed_time); *load = 100 - idle_time; } - mod_timer(&load_timer, jiffies + msecs_to_jiffies(100)); + mod_timer(&load_timer, jiffies + msecs_to_jiffies(load_sample_rate)); } static void start_load_timer(void) @@ -183,6 +184,14 @@ static unsigned int count_slow_cpus(unsigned int limit) return cnt; } +#define NR_FSHIFT 2 +static unsigned int nr_run_thresholds[] = { +/* 1, 2, 3, 4 - on-line cpus target */ + 5, 9, 10, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ +}; +static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ +static unsigned int nr_run_last; + static CPU_SPEED_BALANCE balanced_speed_balance(void) { unsigned long highest_speed = cpu_highest_speed(); @@ -190,14 +199,27 @@ static CPU_SPEED_BALANCE balanced_speed_balance(void) unsigned long skewed_speed = balanced_speed / 2; unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + unsigned int avg_nr_run = avg_nr_running(); + unsigned int nr_run; /* balanced: freq targets for all CPUs are above 50% of highest speed biased: freq target for at least one CPU is below 50% threshold skewed: freq targets for at least 2 CPUs are below 25% threshold */ - if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus) + for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { + unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; + if (nr_run_last <= nr_run) + nr_threshold += nr_run_hysteresis; + if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT))) + break; + } + nr_run_last = nr_run; + + if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus || + nr_run < nr_cpus) return CPU_SPEED_SKEWED; - if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus) + if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus || + nr_run <= nr_cpus) return CPU_SPEED_BIASED; return CPU_SPEED_BALANCED; @@ -207,6 +229,8 @@ static void balanced_work_func(struct work_struct *work) { bool up = false; unsigned int cpu = nr_cpu_ids; + unsigned long now = jiffies; + CPU_SPEED_BALANCE balance; switch (balanced_state) { @@ -217,7 +241,7 @@ static void balanced_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { up = false; queue_delayed_work(balanced_wq, - &balanced_work, down_delay); + &balanced_work, up_delay); } else stop_load_timer(); break; @@ -250,7 +274,11 @@ static void balanced_work_func(struct work_struct *work) __func__, balanced_state); } + if (!up && ((now - last_change_time) < down_delay)) + cpu = nr_cpu_ids; + if (cpu < nr_cpu_ids) { + last_change_time = now; if (up) cpuquiet_wake_cpu(cpu); else @@ -294,7 +322,7 @@ static int balanced_cpufreq_transition(struct notifier_block *nb, if (cpu_freq <= idle_bottom_freq) { balanced_state = DOWN; queue_delayed_work(balanced_wq, - &balanced_work, down_delay); + &balanced_work, up_delay); start_load_timer(); } break; @@ -357,6 +385,7 @@ BALANCED_ATTRIBUTE(idle_bottom_freq, 0644); BALANCED_ATTRIBUTE(idle_top_freq, 0644); BALANCED_ATTRIBUTE(up_delay, 0644); BALANCED_ATTRIBUTE(down_delay, 0644); +BALANCED_ATTRIBUTE(load_sample_rate, 0644); static struct attribute *balanced_attributes[] = { &balance_level_attr.attr, @@ -419,6 +448,7 @@ static int balanced_start(void) { int err, count; struct cpufreq_frequency_table *table; + struct cpufreq_freqs initial_freq; err = balanced_sysfs(); if (err) @@ -431,8 +461,8 @@ static int balanced_start(void) INIT_DELAYED_WORK(&balanced_work, balanced_work_func); - up_delay = msecs_to_jiffies(1000); - down_delay = msecs_to_jiffies(2000); + up_delay = msecs_to_jiffies(100); + down_delay = msecs_to_jiffies(500); table = cpufreq_frequency_get_table(0); for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++) @@ -447,6 +477,11 @@ static int balanced_start(void) init_timer(&load_timer); load_timer.function = calculate_load_timer; + /*FIXME: Kick start the state machine by faking a freq notification*/ + initial_freq.new = cpufreq_get(0); + if (initial_freq.new != 0) + balanced_cpufreq_transition(NULL, CPUFREQ_RESUMECHANGE, + &initial_freq); return 0; } From b39458c042f3f3a0a53ad587d6a11e482b110949 Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Tue, 19 Jun 2012 10:34:14 -0700 Subject: [PATCH 194/678] ARM: cpuquiet: Go to and stay on G-CPU if min_cpus 1 is requested Only return to LP-CPU if minimum number of cpus requested by the PM QoS interface is at the default 0 level. Change-Id: I1384c7fc62b86e385e10b4baaf81eb600f31da6e Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/109831 Reviewed-by: Simone Willett Tested-by: Simone Willett --- arch/arm/mach-tegra/cpuquiet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 2b5826d9740..7ef217eb687 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -160,7 +160,8 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) break; case TEGRA_CPQ_SWITCH_TO_LP: if (!is_lp_cluster() && !no_lp && - num_online_cpus() == 1) { + !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS) + && num_online_cpus() == 1) { if (!clk_set_parent(cpu_clk, cpu_lp_clk)) { /*catch-up with governor target speed*/ tegra_cpu_set_speed_cap(NULL); @@ -219,7 +220,7 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) { mutex_lock(tegra3_cpu_lock); - if ((n >= 2) && is_lp_cluster()) { + if ((n >= 1) && is_lp_cluster()) { /* make sure cpu rate is within g-mode range before switching */ unsigned int speed = max( tegra_getspeed(0), clk_get_min_rate(cpu_g_clk) / 1000); From b9254ee2dcff7e477471ab0bad4fcf9bc3b29efc Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Mon, 11 Jun 2012 20:43:59 +0300 Subject: [PATCH 195/678] ARM: tegra: add sysfs support for tegra cpuquiet driver Change-Id: I215c5de8e98d139a93113978e1e27adb5a6b252c Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/111283 Reviewed-by: Simone Willett Tested-by: Simone Willett --- arch/arm/mach-tegra/cpuquiet.c | 119 ++++++++++++++++++++--- drivers/cpuquiet/Makefile | 2 +- drivers/cpuquiet/cpuquiet_attribute.c | 133 ++++++++++++++++++++++++++ drivers/cpuquiet/governors/balanced.c | 84 ++++------------ include/linux/cpuquiet.h | 52 ++++++++++ 5 files changed, 311 insertions(+), 79 deletions(-) create mode 100644 drivers/cpuquiet/cpuquiet_attribute.c diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 7ef217eb687..f0163e1f3fc 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -20,7 +20,6 @@ */ #include -#include #include #include #include @@ -47,20 +46,15 @@ static struct workqueue_struct *cpuquiet_wq; static struct delayed_work cpuquiet_work; static struct work_struct minmax_work; -static bool no_lp; -module_param(no_lp, bool, 0644); +static struct kobject *tegra_auto_sysfs_kobject; +static bool no_lp; +static bool enable; static unsigned long up_delay; -module_param(up_delay, ulong, 0644); static unsigned long down_delay; -module_param(down_delay, ulong, 0644); - static int mp_overhead = 10; -module_param(mp_overhead, int, 0644); static unsigned int idle_top_freq; -module_param(idle_top_freq, uint, 0644); static unsigned int idle_bottom_freq; -module_param(idle_bottom_freq, uint, 0644); static struct clk *cpu_clk; static struct clk *cpu_g_clk; @@ -222,8 +216,8 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) if ((n >= 1) && is_lp_cluster()) { /* make sure cpu rate is within g-mode range before switching */ - unsigned int speed = max( - tegra_getspeed(0), clk_get_min_rate(cpu_g_clk) / 1000); + unsigned long speed = max((unsigned long)tegra_getspeed(0), + clk_get_min_rate(cpu_g_clk) / 1000); tegra_update_cpu_speed(speed); clk_set_parent(cpu_clk, cpu_g_clk); @@ -293,8 +287,94 @@ static struct notifier_block max_cpus_notifier = { .notifier_call = max_cpus_notify, }; +static void delay_callback(struct cpuquiet_attribute *attr) +{ + unsigned long val; + + if (attr) { + val = (*((unsigned long *)(attr->param))); + (*((unsigned long *)(attr->param))) = msecs_to_jiffies(val); + } +} + +static void enable_callback(struct cpuquiet_attribute *attr) +{ + mutex_lock(tegra3_cpu_lock); + + if (!enable && cpq_state != TEGRA_CPQ_DISABLED) { + cpq_state = TEGRA_CPQ_DISABLED; + mutex_unlock(tegra3_cpu_lock); + cancel_delayed_work_sync(&cpuquiet_work); + pr_info("Tegra cpuquiet clusterswitch disabled\n"); + mutex_lock(tegra3_cpu_lock); + } else if (enable && cpq_state == TEGRA_CPQ_DISABLED) { + cpq_state = TEGRA_CPQ_IDLE; + pr_info("Tegra cpuquiet clusterswitch enabled\n"); + tegra_cpu_set_speed_cap(NULL); + } + + mutex_unlock(tegra3_cpu_lock); +} + +CPQ_BASIC_ATTRIBUTE(no_lp, 0644, bool); +CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint); +CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint); +CPQ_BASIC_ATTRIBUTE(mp_overhead, 0644, int); +CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(enable, 0644, bool, enable_callback); + +static struct attribute *tegra_auto_attributes[] = { + &no_lp_attr.attr, + &up_delay_attr.attr, + &down_delay_attr.attr, + &idle_top_freq_attr.attr, + &idle_bottom_freq_attr.attr, + &mp_overhead_attr.attr, + &enable_attr.attr, + NULL, +}; + +static const struct sysfs_ops tegra_auto_sysfs_ops = { + .show = cpuquiet_auto_sysfs_show, + .store = cpuquiet_auto_sysfs_store, +}; + +static struct kobj_type ktype_sysfs = { + .sysfs_ops = &tegra_auto_sysfs_ops, + .default_attrs = tegra_auto_attributes, +}; + +static int tegra_auto_sysfs(void) +{ + int err; + + tegra_auto_sysfs_kobject = kzalloc(sizeof(*tegra_auto_sysfs_kobject), + GFP_KERNEL); + + if (!tegra_auto_sysfs_kobject) + return -ENOMEM; + + err = cpuquiet_kobject_init(tegra_auto_sysfs_kobject, &ktype_sysfs, + "tegra_cpuquiet"); + + if (err) + kfree(tegra_auto_sysfs_kobject); + + return err; +} + int tegra_auto_hotplug_init(struct mutex *cpu_lock) { + int err; + + cpu_clk = clk_get_sys(NULL, "cpu"); + cpu_g_clk = clk_get_sys(NULL, "cpu_g"); + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + + if (IS_ERR(cpu_clk) || IS_ERR(cpu_g_clk) || IS_ERR(cpu_lp_clk)) + return -ENOENT; + /* * Not bound to the issuer CPU (=> high-priority), has rescue worker * task, single-threaded, freezable. @@ -324,6 +404,8 @@ int tegra_auto_hotplug_init(struct mutex *cpu_lock) tegra3_cpu_lock = cpu_lock; cpq_state = INITIAL_STATE; + enable = cpq_state == TEGRA_CPQ_DISABLED ? false : true; + pr_info("Tegra cpuquiet initialized: %s\n", (cpq_state == TEGRA_CPQ_DISABLED) ? "disabled" : "enabled"); @@ -335,11 +417,24 @@ int tegra_auto_hotplug_init(struct mutex *cpu_lock) pr_err("%s: Failed to register max cpus PM QoS notifier\n", __func__); - return cpuquiet_register_driver(&tegra_cpuquiet_driver); + err = cpuquiet_register_driver(&tegra_cpuquiet_driver); + if (err) { + destroy_workqueue(cpuquiet_wq); + return err; + } + + err = tegra_auto_sysfs(); + if (err) { + cpuquiet_unregister_driver(&tegra_cpuquiet_driver); + destroy_workqueue(cpuquiet_wq); + } + + return err; } void tegra_auto_hotplug_exit(void) { destroy_workqueue(cpuquiet_wq); cpuquiet_unregister_driver(&tegra_cpuquiet_driver); + kobject_put(tegra_auto_sysfs_kobject); } diff --git a/drivers/cpuquiet/Makefile b/drivers/cpuquiet/Makefile index 0502d4f3301..e438defaacd 100644 --- a/drivers/cpuquiet/Makefile +++ b/drivers/cpuquiet/Makefile @@ -1 +1 @@ -obj-$(CONFIG_CPUQUIET_FRAMEWORK) += cpuquiet.o driver.o sysfs.o governor.o governors/ +obj-$(CONFIG_CPUQUIET_FRAMEWORK) += cpuquiet.o driver.o sysfs.o cpuquiet_attribute.o governor.o governors/ diff --git a/drivers/cpuquiet/cpuquiet_attribute.c b/drivers/cpuquiet/cpuquiet_attribute.c new file mode 100644 index 00000000000..9f1aa430149 --- /dev/null +++ b/drivers/cpuquiet/cpuquiet_attribute.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +ssize_t show_int_attribute(struct cpuquiet_attribute *cattr, char *buf) +{ + return sprintf(buf, "%d\n", *((int *)cattr->param)); +} + +ssize_t store_int_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err, val; + + err = kstrtoint(buf, 0, &val); + if (err < 0) + return err; + + *((int *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t show_bool_attribute(struct cpuquiet_attribute *cattr, char *buf) +{ + return sprintf(buf, "%d\n", *((bool *)cattr->param)); +} + +ssize_t store_bool_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err, val; + + err = kstrtoint(buf, 0, &val); + if (err < 0) + return err; + + if (val < 0 || val > 1) + return -EINVAL; + + *((bool *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t show_uint_attribute(struct cpuquiet_attribute *cattr, char *buf) +{ + return sprintf(buf, "%u\n", *((unsigned int *)cattr->param)); +} + +ssize_t store_uint_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err; + unsigned int val; + + err = kstrtouint(buf, 0, &val); + if (err < 0) + return err; + + *((unsigned int *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t store_ulong_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count) +{ + int err; + unsigned long val; + + err = kstrtoul(buf, 0, &val); + if (err < 0) + return err; + + *((unsigned long *)(cattr->param)) = val; + + if (cattr->store_callback) + cattr->store_callback(cattr); + + return count; +} + +ssize_t show_ulong_attribute(struct cpuquiet_attribute *cattr, + char *buf) +{ + return sprintf(buf, "%lu\n", *((unsigned long *)cattr->param)); +} + +ssize_t cpuquiet_auto_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct cpuquiet_attribute *cattr = + container_of(attr, struct cpuquiet_attribute, attr); + + if (cattr->store) + return cattr->store(cattr, buf, count); + + return -EINVAL; +} + +ssize_t cpuquiet_auto_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct cpuquiet_attribute *cattr = + container_of(attr, struct cpuquiet_attribute, attr); + + return cattr->show(cattr, buf); +} diff --git a/drivers/cpuquiet/governors/balanced.c b/drivers/cpuquiet/governors/balanced.c index 968344e220d..da98362e5f2 100644 --- a/drivers/cpuquiet/governors/balanced.c +++ b/drivers/cpuquiet/governors/balanced.c @@ -55,30 +55,15 @@ static DEFINE_PER_CPU(unsigned int, cpu_load); static struct timer_list load_timer; static bool load_timer_active; -struct balanced_attribute { - struct attribute attr; - ssize_t (*show)(struct balanced_attribute *attr, char *buf); - ssize_t (*store)(struct balanced_attribute *attr, const char *buf, - size_t count); - unsigned long *param; -}; - -#define BALANCED_ATTRIBUTE(_name, _mode) \ - static struct balanced_attribute _name ## _attr = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = show_attribute, \ - .store = store_attribute, \ - .param = &_name, \ -} /* configurable parameters */ -static unsigned long balance_level = 60; -static unsigned long idle_bottom_freq; -static unsigned long idle_top_freq; +static unsigned int balance_level = 60; +static unsigned int idle_bottom_freq; +static unsigned int idle_top_freq; static unsigned long up_delay; static unsigned long down_delay; static unsigned long last_change_time; -static unsigned long load_sample_rate = 20; // msec +static unsigned int load_sample_rate = 20; /* msec */ static struct workqueue_struct *balanced_wq; static struct delayed_work balanced_work; static BALANCED_STATE balanced_state; @@ -339,53 +324,22 @@ static struct notifier_block balanced_cpufreq_nb = { .notifier_call = balanced_cpufreq_transition, }; -static ssize_t show_attribute(struct balanced_attribute *battr, char *buf) -{ - return sprintf(buf, "%lu\n", *(battr->param)); -} - -static ssize_t store_attribute(struct balanced_attribute *battr, - const char *buf, size_t count) +static void delay_callback(struct cpuquiet_attribute *attr) { - int err; unsigned long val; - err = strict_strtoul(buf, 0, &val); - if (err < 0) - return err; - - *(battr->param) = val; - - return count; -} - -static ssize_t balanced_sysfs_store(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - struct balanced_attribute *battr = - container_of(attr, struct balanced_attribute, attr); - - if (battr->store) - return battr->store(battr, buf, count); - - return -EINVAL; -} - -static ssize_t balanced_sysfs_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct balanced_attribute *battr = - container_of(attr, struct balanced_attribute, attr); - - return battr->show(battr, buf); + if (attr) { + val = (*((unsigned long *)(attr->param))); + (*((unsigned long *)(attr->param))) = msecs_to_jiffies(val); + } } -BALANCED_ATTRIBUTE(balance_level, 0644); -BALANCED_ATTRIBUTE(idle_bottom_freq, 0644); -BALANCED_ATTRIBUTE(idle_top_freq, 0644); -BALANCED_ATTRIBUTE(up_delay, 0644); -BALANCED_ATTRIBUTE(down_delay, 0644); -BALANCED_ATTRIBUTE(load_sample_rate, 0644); +CPQ_BASIC_ATTRIBUTE(balance_level, 0644, uint); +CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint); +CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint); +CPQ_BASIC_ATTRIBUTE(load_sample_rate, 0644, uint); +CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback); static struct attribute *balanced_attributes[] = { &balance_level_attr.attr, @@ -397,8 +351,8 @@ static struct attribute *balanced_attributes[] = { }; static const struct sysfs_ops balanced_sysfs_ops = { - .show = balanced_sysfs_show, - .store = balanced_sysfs_store, + .show = cpuquiet_auto_sysfs_show, + .store = cpuquiet_auto_sysfs_store, }; static struct kobj_type ktype_balanced = { @@ -427,7 +381,6 @@ static int balanced_sysfs(void) static void balanced_stop(void) { - /* first unregister the notifiers. This ensures the governor state can't be modified by a cpufreq transition @@ -465,8 +418,7 @@ static int balanced_start(void) down_delay = msecs_to_jiffies(500); table = cpufreq_frequency_get_table(0); - for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++) - ; + for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++); idle_top_freq = table[(count / 2) - 1].frequency; idle_bottom_freq = table[(count / 2) - 2].frequency; diff --git a/include/linux/cpuquiet.h b/include/linux/cpuquiet.h index 8459af7aad7..fe5a0372773 100644 --- a/include/linux/cpuquiet.h +++ b/include/linux/cpuquiet.h @@ -50,4 +50,56 @@ extern void cpuquiet_remove_group(struct attribute_group *attrs); int cpuquiet_kobject_init(struct kobject *kobj, struct kobj_type *type, char *name); extern unsigned int nr_cluster_ids; + +/* Sysfs support */ +struct cpuquiet_attribute { + struct attribute attr; + ssize_t (*show)(struct cpuquiet_attribute *attr, char *buf); + ssize_t (*store)(struct cpuquiet_attribute *attr, const char *buf, + size_t count); + /* Optional. Called after store is called */ + void (*store_callback)(struct cpuquiet_attribute *attr); + void *param; +}; + +#define CPQ_ATTRIBUTE(_name, _mode, _type, _callback) \ + static struct cpuquiet_attribute _name ## _attr = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = show_ ## _type ## _attribute, \ + .store = store_ ## _type ## _attribute, \ + .store_callback = _callback, \ + .param = &_name, \ +} + +#define CPQ_BASIC_ATTRIBUTE(_name, _mode, _type) \ + CPQ_ATTRIBUTE(_name, _mode, _type, NULL) + +#define CPQ_ATTRIBUTE_CUSTOM(_name, _mode, _show, _store) \ + static struct cpuquiet_attribute _name ## _attr = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store \ + .store_callback = NULL, \ + .param = &_name, \ +} + + +extern ssize_t show_int_attribute(struct cpuquiet_attribute *cattr, char *buf); +extern ssize_t store_int_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t show_bool_attribute(struct cpuquiet_attribute *cattr, char *buf); +extern ssize_t store_bool_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t store_uint_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t show_uint_attribute(struct cpuquiet_attribute *cattr, char *buf); +extern ssize_t store_ulong_attribute(struct cpuquiet_attribute *cattr, + const char *buf, size_t count); +extern ssize_t show_ulong_attribute(struct cpuquiet_attribute *cattr, + char *buf); +extern ssize_t cpuquiet_auto_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf); +extern ssize_t cpuquiet_auto_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, + size_t count); #endif From dda14b0f5f19841994f6b2b6f8dda847785d747d Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Fri, 22 Jun 2012 15:42:59 -0700 Subject: [PATCH 196/678] cpuquiet: Properly return the first registered governor Change-Id: I8a21a15d5cf8e4c91a166810387b5d8f1dd7cdb6 Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/111284 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Yu-Huan Hsu --- drivers/cpuquiet/governor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuquiet/governor.c b/drivers/cpuquiet/governor.c index 1446b9ee506..7895fccc7f4 100644 --- a/drivers/cpuquiet/governor.c +++ b/drivers/cpuquiet/governor.c @@ -28,7 +28,8 @@ struct cpuquiet_governor *cpuquiet_curr_governor; struct cpuquiet_governor *cpuquiet_get_first_governor(void) { if (!list_empty(&cpuquiet_governors)) - return list_entry(&cpuquiet_governors, struct cpuquiet_governor, + return list_entry(cpuquiet_governors.next, + struct cpuquiet_governor, governor_list); else return NULL; From 192891cc2191ee8cbc826d0a1cf834060efea394 Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Thu, 14 Jun 2012 14:17:14 -0700 Subject: [PATCH 197/678] cpuquiet: Update stats only on successful operations Change-Id: I0584fba7458b3a860f9ab3751a8eb5f0345864ad Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/111285 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Yu-Huan Hsu --- drivers/cpuquiet/driver.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpuquiet/driver.c b/drivers/cpuquiet/driver.c index f9dcdf018f5..d9dbea76994 100644 --- a/drivers/cpuquiet/driver.c +++ b/drivers/cpuquiet/driver.c @@ -78,7 +78,8 @@ int cpuquiet_quiesence_cpu(unsigned int cpunumber) if (cpuquiet_curr_driver && cpuquiet_curr_driver->quiesence_cpu) err = cpuquiet_curr_driver->quiesence_cpu(cpunumber); - stats_update(stats + cpunumber, 0); + if (!err) + stats_update(stats + cpunumber, 0); return err; } @@ -91,7 +92,8 @@ int cpuquiet_wake_cpu(unsigned int cpunumber) if (cpuquiet_curr_driver && cpuquiet_curr_driver->wake_cpu) err = cpuquiet_curr_driver->wake_cpu(cpunumber); - stats_update(stats + cpunumber, 1); + if (!err) + stats_update(stats + cpunumber, 1); return err; } From 769973b158032a8f9491fd1f1b475c1070046b6f Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Thu, 14 Jun 2012 18:23:17 -0700 Subject: [PATCH 198/678] cpuquiet: Fix compiler warning Change-Id: I03c11c295b40ebd500a715974f7fdca560d9a43a Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/111286 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Yu-Huan Hsu --- drivers/cpuquiet/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuquiet/sysfs.c b/drivers/cpuquiet/sysfs.c index 1e1c14865b2..0d63eee37dc 100644 --- a/drivers/cpuquiet/sysfs.c +++ b/drivers/cpuquiet/sysfs.c @@ -286,5 +286,6 @@ void cpuquiet_add_dev(struct sys_device *sys_dev, unsigned int cpu) void cpuquiet_remove_dev(unsigned int cpu) { - kobject_put(cpuquiet_cpu_devices[cpu]); + if (cpu < CONFIG_NR_CPUS && cpuquiet_cpu_devices[cpu]) + kobject_put(&cpuquiet_cpu_devices[cpu]->kobj); } From 1f96bf98e37a96dad4fdaf40d3dcd1690da91ed6 Mon Sep 17 00:00:00 2001 From: Peter Boonstoppel Date: Thu, 17 May 2012 15:15:43 -0700 Subject: [PATCH 199/678] sched: unthrottle rt runqueues in __disable_runtime() migrate_tasks() uses _pick_next_task_rt() to get tasks from the real-time runqueues to be migrated. When rt_rq is throttled _pick_next_task_rt() won't return anything, in which case migrate_tasks() can't move all threads over and gets stuck in an infinite loop. Instead unthrottle rt runqueues before migrating tasks. Bug 976709 Change-Id: Ie3696702abc560fe8ffa7d2fb5dc5d54d532cc0d Signed-off-by: Peter Boonstoppel (cherry picked from commit 4d18ba5765c206bf9f37634f532d97dabd507a58) Reviewed-on: http://git-master/r/103417 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Aleksandr Frid Reviewed-by: Yu-Huan Hsu --- kernel/sched_rt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index af1177858be..046f429be33 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -497,6 +497,7 @@ static void __disable_runtime(struct rq *rq) * runtime - in which case borrowing doesn't make sense. */ rt_rq->rt_runtime = RUNTIME_INF; + rt_rq->rt_throttled = 0; raw_spin_unlock(&rt_rq->rt_runtime_lock); raw_spin_unlock(&rt_b->rt_runtime_lock); } From 120a84d51deb4a41d3c44685abd68fd5abffee3b Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Thu, 5 Jul 2012 10:47:23 -0700 Subject: [PATCH 200/678] cpuquiet: Account for the corner case frequency when setting state Now consider frequency greater than or equal to idle_top_freq as UP Change-Id: I1332d46d1e42a00b3b31897b158eaf4ccfbaf8f5 Signed-off-by: Sai Charan Gurrappadi Reviewed-on: http://git-master/r/113678 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Aleksandr Frid Reviewed-by: Peter De Schrijver Reviewed-by: Peter Boonstoppel Reviewed-by: Yu-Huan Hsu GVS: Gerrit_Virtual_Submit --- drivers/cpuquiet/governors/balanced.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpuquiet/governors/balanced.c b/drivers/cpuquiet/governors/balanced.c index da98362e5f2..f0d2e03ae22 100644 --- a/drivers/cpuquiet/governors/balanced.c +++ b/drivers/cpuquiet/governors/balanced.c @@ -282,7 +282,7 @@ static int balanced_cpufreq_transition(struct notifier_block *nb, switch (balanced_state) { case IDLE: - if (cpu_freq > idle_top_freq) { + if (cpu_freq >= idle_top_freq) { balanced_state = UP; queue_delayed_work( balanced_wq, &balanced_work, up_delay); @@ -296,7 +296,7 @@ static int balanced_cpufreq_transition(struct notifier_block *nb, } break; case DOWN: - if (cpu_freq > idle_top_freq) { + if (cpu_freq >= idle_top_freq) { balanced_state = UP; queue_delayed_work( balanced_wq, &balanced_work, up_delay); @@ -347,6 +347,7 @@ static struct attribute *balanced_attributes[] = { &idle_top_freq_attr.attr, &up_delay_attr.attr, &down_delay_attr.attr, + &load_sample_rate_attr.attr, NULL, }; From 385beda31a1bded402c0f7819b0a30bfceec1f36 Mon Sep 17 00:00:00 2001 From: faux123 Date: Tue, 4 Sep 2012 07:49:52 -0700 Subject: [PATCH 201/678] kernel/sys: cpuquiet compatibility fixup Conflicts: kernel/sys.c --- kernel/sys.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/sys.c b/kernel/sys.c index 4e2306e1a04..d21bdc4fb26 100755 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -125,7 +125,10 @@ EXPORT_SYMBOL(cad_pid); void (*pm_power_off_prepare)(void); - extern void disable_auto_hotplug(void); +#ifndef CONFIG_CPUQUIET_FRAMEWORK +extern void disable_auto_hotplug(void); +#endif + /* * Returns true if current's euid is same as p's uid or euid, * or has CAP_SYS_NICE to p's user_ns. @@ -367,7 +370,9 @@ EXPORT_SYMBOL(unregister_reboot_notifier); */ void kernel_restart(char *cmd) { +#ifndef CONFIG_CPUQUIET_FRAMEWORK disable_auto_hotplug(); +#endif kernel_restart_prepare(cmd); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); @@ -418,7 +423,9 @@ void kernel_power_off(void) kernel_restart(cmd); } +#ifndef CONFIG_CPUQUIET_FRAMEWORK disable_auto_hotplug(); +#endif kernel_shutdown_prepare(SYSTEM_POWER_OFF); if (pm_power_off_prepare) pm_power_off_prepare(); From 7e0d9d50c7606ab03be54b96ff475dd773fe81c8 Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 9 Sep 2012 10:47:31 -0700 Subject: [PATCH 202/678] board/cpu-tegra3: make runnable thread as a compilation option Conflicts: arch/arm/mach-tegra/Kconfig --- arch/arm/mach-tegra/Kconfig | 7 +++++++ arch/arm/mach-tegra/cpu-tegra3.c | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 44ecaa48bce..ea9f2222f49 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -474,6 +474,13 @@ config TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND help Also will restore to original cpu frequency governor when device is resumed +config TEGRA_RUNNABLE_THREAD + bool "Tegra3 runnable thread hotplug" + depends on ARCH_TEGRA_3x_SOC + default n + help + Tegra3 Runnable Thread calculations for CPU hotplug + config TEGRA_VARIANT_INFO bool "Tegra3 variant info" depends on ARCH_TEGRA_3x_SOC diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index c5a8680ee10..4d8a5219879 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -73,7 +73,9 @@ static struct clk *cpu_clk; static struct clk *cpu_g_clk; static struct clk *cpu_lp_clk; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD static unsigned long last_change_time; +#endif static struct { cputime64_t time_up_total; @@ -188,6 +190,7 @@ enum { TEGRA_CPU_SPEED_SKEWED, }; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ @@ -195,6 +198,7 @@ static unsigned int nr_run_thresholds[] = { }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; +#endif static noinline int tegra_cpu_speed_balance(void) { @@ -204,6 +208,7 @@ static noinline int tegra_cpu_speed_balance(void) unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; unsigned int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD unsigned int avg_nr_run = avg_nr_running(); unsigned int nr_run; @@ -224,16 +229,21 @@ static noinline int tegra_cpu_speed_balance(void) break; } nr_run_last = nr_run; +#endif if (((tegra_count_slow_cpus(skewed_speed) >= 2) || +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD (nr_run < nr_cpus) || +#endif tegra_cpu_edp_favor_down(nr_cpus, mp_overhead) || (highest_speed <= idle_bottom_freq) || (nr_cpus > max_cpus)) && (nr_cpus > min_cpus)) return TEGRA_CPU_SPEED_SKEWED; if (((tegra_count_slow_cpus(balanced_speed) >= 1) || +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD (nr_run <= nr_cpus) || +#endif (!tegra_cpu_edp_favor_up(nr_cpus, mp_overhead)) || (highest_speed <= idle_bottom_freq) || (nr_cpus == max_cpus)) && (nr_cpus >= min_cpus)) @@ -251,6 +261,9 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) bool up = false; unsigned int cpu = nr_cpu_ids; unsigned long now = jiffies; +#ifndef CONFIG_TEGRA_RUNNABLE_THREAD + static unsigned long last_change_time; +#endif mutex_lock(tegra3_cpu_lock); @@ -263,8 +276,12 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { up = false; } else if (!is_lp_cluster() && !no_lp && +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS) && ((now - last_change_time) >= down_delay)) { +#else + !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS)) { +#endif if(!clk_set_parent(cpu_clk, cpu_lp_clk)) { hp_stats_update(CONFIG_NR_CPUS, true); hp_stats_update(0, false); @@ -274,12 +291,18 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) } } queue_delayed_work( +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD hotplug_wq, &hotplug_work, up2gn_delay); +#else + hotplug_wq, &hotplug_work, down_delay); +#endif break; case TEGRA_HP_UP: if (is_lp_cluster() && !no_lp) { if(!clk_set_parent(cpu_clk, cpu_g_clk)) { +#ifndef CONFIG_TEGRA_RUNNABLE_THREAD last_change_time = now; +#endif hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); /* catch-up with governor target speed */ @@ -346,7 +369,9 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) tegra_update_cpu_speed(speed); if (!clk_set_parent(cpu_clk, cpu_g_clk)) { +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD last_change_time = jiffies; +#endif hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } @@ -412,7 +437,11 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) } else if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD hotplug_wq, &hotplug_work, up_delay); +#else + hotplug_wq, &hotplug_work, down_delay); +#endif } break; case TEGRA_HP_DOWN: @@ -428,7 +457,11 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) if (cpu_freq <= bottom_freq) { hp_state = TEGRA_HP_DOWN; queue_delayed_work( +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD hotplug_wq, &hotplug_work, up_delay); +#else + hotplug_wq, &hotplug_work, down_delay); +#endif } else if (cpu_freq <= top_freq) { hp_state = TEGRA_HP_IDLE; } From b60b5f541215ece1bb6917fb48e6b67e9fc12e9b Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 9 Sep 2012 10:59:41 -0700 Subject: [PATCH 203/678] cpuquiet/governors/balanced: make runnable thread as a compilation option --- drivers/cpuquiet/governors/balanced.c | 46 ++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/cpuquiet/governors/balanced.c b/drivers/cpuquiet/governors/balanced.c index f0d2e03ae22..b0f88f9a042 100644 --- a/drivers/cpuquiet/governors/balanced.c +++ b/drivers/cpuquiet/governors/balanced.c @@ -62,8 +62,10 @@ static unsigned int idle_bottom_freq; static unsigned int idle_top_freq; static unsigned long up_delay; static unsigned long down_delay; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD static unsigned long last_change_time; static unsigned int load_sample_rate = 20; /* msec */ +#endif static struct workqueue_struct *balanced_wq; static struct delayed_work balanced_work; static BALANCED_STATE balanced_state; @@ -92,7 +94,11 @@ static void calculate_load_timer(unsigned long data) do_div(idle_time, elapsed_time); *load = 100 - idle_time; } +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD mod_timer(&load_timer, jiffies + msecs_to_jiffies(load_sample_rate)); +#else + mod_timer(&load_timer, jiffies + msecs_to_jiffies(100)); +#endif } static void start_load_timer(void) @@ -168,7 +174,7 @@ static unsigned int count_slow_cpus(unsigned int limit) return cnt; } - +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ @@ -176,6 +182,7 @@ static unsigned int nr_run_thresholds[] = { }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; +#endif static CPU_SPEED_BALANCE balanced_speed_balance(void) { @@ -184,12 +191,15 @@ static CPU_SPEED_BALANCE balanced_speed_balance(void) unsigned long skewed_speed = balanced_speed / 2; unsigned int nr_cpus = num_online_cpus(); unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD unsigned int avg_nr_run = avg_nr_running(); unsigned int nr_run; +#endif /* balanced: freq targets for all CPUs are above 50% of highest speed biased: freq target for at least one CPU is below 50% threshold skewed: freq targets for at least 2 CPUs are below 25% threshold */ +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; if (nr_run_last <= nr_run) @@ -201,10 +211,17 @@ static CPU_SPEED_BALANCE balanced_speed_balance(void) if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus || nr_run < nr_cpus) +#else + if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus) +#endif return CPU_SPEED_SKEWED; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus || nr_run <= nr_cpus) +#else + if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus) +#endif return CPU_SPEED_BIASED; return CPU_SPEED_BALANCED; @@ -214,7 +231,9 @@ static void balanced_work_func(struct work_struct *work) { bool up = false; unsigned int cpu = nr_cpu_ids; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD unsigned long now = jiffies; +#endif CPU_SPEED_BALANCE balance; @@ -226,7 +245,11 @@ static void balanced_work_func(struct work_struct *work) if (cpu < nr_cpu_ids) { up = false; queue_delayed_work(balanced_wq, +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD &balanced_work, up_delay); +#else + &balanced_work, down_delay); +#endif } else stop_load_timer(); break; @@ -259,11 +282,15 @@ static void balanced_work_func(struct work_struct *work) __func__, balanced_state); } +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD if (!up && ((now - last_change_time) < down_delay)) cpu = nr_cpu_ids; +#endif if (cpu < nr_cpu_ids) { +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD last_change_time = now; +#endif if (up) cpuquiet_wake_cpu(cpu); else @@ -307,7 +334,11 @@ static int balanced_cpufreq_transition(struct notifier_block *nb, if (cpu_freq <= idle_bottom_freq) { balanced_state = DOWN; queue_delayed_work(balanced_wq, +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD &balanced_work, up_delay); +#else + &balanced_work, down_delay); +#endif start_load_timer(); } break; @@ -337,7 +368,9 @@ static void delay_callback(struct cpuquiet_attribute *attr) CPQ_BASIC_ATTRIBUTE(balance_level, 0644, uint); CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint); CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint); +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD CPQ_BASIC_ATTRIBUTE(load_sample_rate, 0644, uint); +#endif CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback); CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback); @@ -347,7 +380,9 @@ static struct attribute *balanced_attributes[] = { &idle_top_freq_attr.attr, &up_delay_attr.attr, &down_delay_attr.attr, +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD &load_sample_rate_attr.attr, +#endif NULL, }; @@ -402,7 +437,9 @@ static int balanced_start(void) { int err, count; struct cpufreq_frequency_table *table; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD struct cpufreq_freqs initial_freq; +#endif err = balanced_sysfs(); if (err) @@ -415,8 +452,13 @@ static int balanced_start(void) INIT_DELAYED_WORK(&balanced_work, balanced_work_func); +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD up_delay = msecs_to_jiffies(100); down_delay = msecs_to_jiffies(500); +#else + up_delay = msecs_to_jiffies(1000); + down_delay = msecs_to_jiffies(2000); +#endif table = cpufreq_frequency_get_table(0); for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++); @@ -430,11 +472,13 @@ static int balanced_start(void) init_timer(&load_timer); load_timer.function = calculate_load_timer; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD /*FIXME: Kick start the state machine by faking a freq notification*/ initial_freq.new = cpufreq_get(0); if (initial_freq.new != 0) balanced_cpufreq_transition(NULL, CPUFREQ_RESUMECHANGE, &initial_freq); +#endif return 0; } From 11fe7b4baff440cbe0b604f671a3d5acef04a7a6 Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Tue, 10 Jul 2012 17:33:58 -0700 Subject: [PATCH 204/678] ARM: tegra: cpuquiet: Notify the cpuquiet governor when the driver is busy Added generic busy/free notifiers that the driver can invoke to let the governor know that it cannot process further core online/offline requests (invoked in our case whenever we switch to the LP cluster). Change-Id: I5e3f7f28f38806a7f87050e8d0c8d2f2cf7521aa Signed-off-by: Sai Charan Gurrappadi --- arch/arm/mach-tegra/cpuquiet.c | 20 +++++++++++++++++--- drivers/cpuquiet/governor.c | 14 ++++++++++++++ include/linux/cpuquiet.h | 4 ++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index f0163e1f3fc..50c7cb56140 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -134,7 +134,7 @@ static void apply_core_config(void) static void tegra_cpuquiet_work_func(struct work_struct *work) { - bool update_cr_config = false; + bool state_changed = false; mutex_lock(tegra3_cpu_lock); @@ -148,7 +148,7 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) /*catch-up with governor target speed */ tegra_cpu_set_speed_cap(NULL); /* process pending core requests*/ - update_cr_config = true; + state_changed = true; } } break; @@ -159,6 +159,7 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) if (!clk_set_parent(cpu_clk, cpu_lp_clk)) { /*catch-up with governor target speed*/ tegra_cpu_set_speed_cap(NULL); + state_changed = true; } } break; @@ -169,8 +170,12 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) mutex_unlock(tegra3_cpu_lock); - if (update_cr_config) + if (state_changed && cpq_state == TEGRA_CPQ_SWITCH_TO_LP) { + cpuquiet_device_busy(); + } else if (state_changed && cpq_state == TEGRA_CPQ_SWITCH_TO_G) { apply_core_config(); + cpuquiet_device_free(); + } } static void min_max_constraints_workfunc(struct work_struct *work) @@ -212,6 +217,8 @@ static void min_max_constraints_workfunc(struct work_struct *work) static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) { + bool g_cluster = false; + mutex_lock(tegra3_cpu_lock); if ((n >= 1) && is_lp_cluster()) { @@ -221,6 +228,7 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) tegra_update_cpu_speed(speed); clk_set_parent(cpu_clk, cpu_g_clk); + g_cluster = true; } tegra_cpu_set_speed_cap(NULL); @@ -228,6 +236,9 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) schedule_work(&minmax_work); + if (g_cluster) + cpuquiet_device_free(); + return NOTIFY_OK; } @@ -253,6 +264,7 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) /* Switch to G-mode if suspend rate is high enough */ if (is_lp_cluster() && (cpu_freq >= idle_bottom_freq)) { clk_set_parent(cpu_clk, cpu_g_clk); + cpuquiet_device_free(); } return; } @@ -306,11 +318,13 @@ static void enable_callback(struct cpuquiet_attribute *attr) mutex_unlock(tegra3_cpu_lock); cancel_delayed_work_sync(&cpuquiet_work); pr_info("Tegra cpuquiet clusterswitch disabled\n"); + cpuquiet_device_busy(); mutex_lock(tegra3_cpu_lock); } else if (enable && cpq_state == TEGRA_CPQ_DISABLED) { cpq_state = TEGRA_CPQ_IDLE; pr_info("Tegra cpuquiet clusterswitch enabled\n"); tegra_cpu_set_speed_cap(NULL); + cpuquiet_device_free(); } mutex_unlock(tegra3_cpu_lock); diff --git a/drivers/cpuquiet/governor.c b/drivers/cpuquiet/governor.c index 7895fccc7f4..176ba3bd705 100644 --- a/drivers/cpuquiet/governor.c +++ b/drivers/cpuquiet/governor.c @@ -100,3 +100,17 @@ void cpuquiet_unregister_governor(struct cpuquiet_governor *gov) list_del(&gov->governor_list); mutex_unlock(&cpuquiet_lock); } + +void cpuquiet_device_busy(void) +{ + if (cpuquiet_curr_governor && + cpuquiet_curr_governor->device_busy_notification) + cpuquiet_curr_governor->device_busy_notification(); +} + +void cpuquiet_device_free(void) +{ + if (cpuquiet_curr_governor && + cpuquiet_curr_governor->device_free_notification) + cpuquiet_curr_governor->device_free_notification(); +} diff --git a/include/linux/cpuquiet.h b/include/linux/cpuquiet.h index fe5a0372773..5558c015bb5 100644 --- a/include/linux/cpuquiet.h +++ b/include/linux/cpuquiet.h @@ -30,6 +30,8 @@ struct cpuquiet_governor { int (*start) (void); void (*stop) (void); int (*store_active) (unsigned int cpu, bool active); + void (*device_free_notification) (void); + void (*device_busy_notification) (void); struct module *owner; }; @@ -47,6 +49,8 @@ extern int cpuquiet_register_driver(struct cpuquiet_driver *drv); extern void cpuquiet_unregister_driver(struct cpuquiet_driver *drv); extern int cpuquiet_add_group(struct attribute_group *attrs); extern void cpuquiet_remove_group(struct attribute_group *attrs); +extern void cpuquiet_device_busy(void); +extern void cpuquiet_device_free(void); int cpuquiet_kobject_init(struct kobject *kobj, struct kobj_type *type, char *name); extern unsigned int nr_cluster_ids; From 02bde37d7d280541e1a94c7b9fbb1d4a4cfdf9de Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Wed, 25 Jul 2012 11:16:08 -0700 Subject: [PATCH 205/678] ARM: tegra: cpuquiet: Fix cpuquiet notifiers The notifiers now properly fire on every cluster switch Change-Id: I381301cf62f25b49532326cc7759696c7f6797b7 Signed-off-by: Sai Charan Gurrappadi --- arch/arm/mach-tegra/cpuquiet.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 50c7cb56140..8006c3ea043 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -134,7 +134,7 @@ static void apply_core_config(void) static void tegra_cpuquiet_work_func(struct work_struct *work) { - bool state_changed = false; + int device_busy = -1; mutex_lock(tegra3_cpu_lock); @@ -148,7 +148,7 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) /*catch-up with governor target speed */ tegra_cpu_set_speed_cap(NULL); /* process pending core requests*/ - state_changed = true; + device_busy = 0; } } break; @@ -159,7 +159,7 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) if (!clk_set_parent(cpu_clk, cpu_lp_clk)) { /*catch-up with governor target speed*/ tegra_cpu_set_speed_cap(NULL); - state_changed = true; + device_busy = 1; } } break; @@ -170,9 +170,9 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) mutex_unlock(tegra3_cpu_lock); - if (state_changed && cpq_state == TEGRA_CPQ_SWITCH_TO_LP) { + if (device_busy == 1) { cpuquiet_device_busy(); - } else if (state_changed && cpq_state == TEGRA_CPQ_SWITCH_TO_G) { + } else if (!device_busy) { apply_core_config(); cpuquiet_device_free(); } From a6a0535d4af2ca3f810f7f2a5e9eb3c8b3c4eccd Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Wed, 25 Jul 2012 08:37:14 -0700 Subject: [PATCH 206/678] cpuquiet: Runnable threads governor [perf] The runnable threads governor only looks at the average number of runnables in the system to make a decision when bringing cores offline/online. First pass; tweaks thresholds and delays to reduce decision latency to about ~50-70ms per core (from ~100-150ms per core) Change-Id: Idd3b268a74a8f56ad3fc0e5c7f388174d1b6611f Signed-off-by: Sai Charan Gurrappadi --- drivers/cpuquiet/governors/Makefile | 2 +- drivers/cpuquiet/governors/runnable_threads.c | 247 ++++++++++++++++++ include/linux/sched.h | 1 + kernel/sched.c | 12 + 4 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 drivers/cpuquiet/governors/runnable_threads.c diff --git a/drivers/cpuquiet/governors/Makefile b/drivers/cpuquiet/governors/Makefile index c7080312708..e199d73008f 100644 --- a/drivers/cpuquiet/governors/Makefile +++ b/drivers/cpuquiet/governors/Makefile @@ -1 +1 @@ -obj-y += userspace.o balanced.o +obj-y += userspace.o balanced.o runnable_threads.o diff --git a/drivers/cpuquiet/governors/runnable_threads.c b/drivers/cpuquiet/governors/runnable_threads.c new file mode 100644 index 00000000000..108f6126da7 --- /dev/null +++ b/drivers/cpuquiet/governors/runnable_threads.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum { + DISABLED, + IDLE, + DOWN, + UP, +} RUNNABLES_STATE; + +static struct delayed_work runnables_work; +static struct kobject *runnables_kobject; + +/* configurable parameters */ +static unsigned int sample_rate = 20; /* msec */ + +static RUNNABLES_STATE runnables_state; +static struct workqueue_struct *runnables_wq; + +#define NR_FSHIFT_EXP 3 +#define NR_FSHIFT (1 << NR_FSHIFT_EXP) +/* avg run threads * 8 (e.g., 11 = 1.375 threads) */ +static unsigned int default_thresholds[] = { + 9, 17, 25, UINT_MAX +}; + +static unsigned int nr_run_last; +static unsigned int nr_run_hysteresis = 4; /* 1 / 4 thread */ +static unsigned int default_threshold_level = 4; /* 1 / 4 thread */ +static unsigned int nr_run_thresholds[NR_CPUS]; + +DEFINE_MUTEX(runnables_work_lock); + +static void update_runnables_state(void) +{ + unsigned int nr_cpus = num_online_cpus(); + int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4; + int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS); + unsigned int avg_nr_run = avg_nr_running(); + unsigned int nr_run; + + if (runnables_state == DISABLED) + return; + + for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) { + unsigned int nr_threshold = nr_run_thresholds[nr_run - 1]; + if (nr_run_last <= nr_run) + nr_threshold += NR_FSHIFT / nr_run_hysteresis; + if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT_EXP))) + break; + } + nr_run_last = nr_run; + + if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) { + runnables_state = DOWN; + } else if (nr_cpus < min_cpus || nr_run > nr_cpus) { + runnables_state = UP; + } else { + runnables_state = IDLE; + } +} + +static unsigned int get_lightest_loaded_cpu_n(void) +{ + unsigned long min_avg_runnables = ULONG_MAX; + unsigned int cpu = nr_cpu_ids; + int i; + + for_each_online_cpu(i) { + unsigned int nr_runnables = get_avg_nr_running(i); + + if (i > 0 && min_avg_runnables > nr_runnables) { + cpu = i; + min_avg_runnables = nr_runnables; + } + } + + return cpu; +} + +static void runnables_work_func(struct work_struct *work) +{ + bool up = false; + bool sample = false; + unsigned int cpu = nr_cpu_ids; + + mutex_lock(&runnables_work_lock); + + update_runnables_state(); + + switch (runnables_state) { + case DISABLED: + break; + case IDLE: + sample = true; + break; + case UP: + cpu = cpumask_next_zero(0, cpu_online_mask); + up = true; + sample = true + break; + case DOWN: + cpu = get_lightest_loaded_cpu_n(); + sample = true; + break; + default: + pr_err("%s: invalid cpuquiet runnable governor state %d\n", + __func__, runnables_state); + break; + } + + if (sample) + queue_delayed_work(runnables_wq, &runnables_work, + msecs_to_jiffies(sample_rate)); + + if (cpu < nr_cpu_ids) { + if (up) + cpuquiet_wake_cpu(cpu); + else + cpuquiet_quiesence_cpu(cpu); + } + + mutex_unlock(&runnables_work_lock); +} + +CPQ_BASIC_ATTRIBUTE(sample_rate, 0644, uint); +CPQ_BASIC_ATTRIBUTE(nr_run_hysteresis, 0644, uint); + +static struct attribute *runnables_attributes[] = { + &sample_rate_attr.attr, + &nr_run_hysteresis_attr.attr, + NULL, +}; + +static const struct sysfs_ops runnables_sysfs_ops = { + .show = cpuquiet_auto_sysfs_show, + .store = cpuquiet_auto_sysfs_store, +}; + +static struct kobj_type ktype_runnables = { + .sysfs_ops = &runnables_sysfs_ops, + .default_attrs = runnables_attributes, +}; + +static int runnables_sysfs(void) +{ + int err; + + runnables_kobject = kzalloc(sizeof(*runnables_kobject), + GFP_KERNEL); + + if (!runnables_kobject) + return -ENOMEM; + + err = cpuquiet_kobject_init(runnables_kobject, &ktype_runnables, + "runnable_threads"); + + if (err) + kfree(runnables_kobject); + + return err; +} + +static void runnables_stop(void) +{ + runnables_state = DISABLED; + cancel_delayed_work_sync(&runnables_work); + destroy_workqueue(runnables_wq); + kobject_put(runnables_kobject); +} + +static int runnables_start(void) +{ + int err, i; + + err = runnables_sysfs(); + if (err) + return err; + + runnables_wq = alloc_workqueue("cpuquiet-runnables", + WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1); + if (!runnables_wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&runnables_work, runnables_work_func); + + for(i = 0; i < ARRAY_SIZE(nr_run_thresholds); ++i) { + if (i < ARRAY_SIZE(default_thresholds)) + nr_run_thresholds[i] = default_thresholds[i]; + else if (i == (ARRAY_SIZE(nr_run_thresholds) - 1)) + nr_run_thresholds[i] = UINT_MAX; + else + nr_run_thresholds[i] = i + 1 + + NR_FSHIFT / default_threshold_level; + } + + runnables_state = IDLE; + runnables_work_func(NULL); + + return 0; +} + +struct cpuquiet_governor runnables_governor = { + .name = "runnable", + .start = runnables_start, + .stop = runnables_stop, + .owner = THIS_MODULE, +}; + +static int __init init_runnables(void) +{ + return cpuquiet_register_governor(&runnables_governor); +} + +static void __exit exit_runnables(void) +{ + cpuquiet_unregister_governor(&runnables_governor); +} + +MODULE_LICENSE("GPL"); +module_init(init_runnables); +module_exit(exit_runnables); diff --git a/include/linux/sched.h b/include/linux/sched.h index a661d2cddb7..f85dc843da7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -139,6 +139,7 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); +extern unsigned long get_avg_nr_running(unsigned int cpu); extern unsigned long avg_nr_running(void); extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); diff --git a/kernel/sched.c b/kernel/sched.c index 8423df5f305..7fe017c88b7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3325,6 +3325,18 @@ unsigned long avg_nr_running(void) return sum; } +unsigned long get_avg_nr_running(unsigned int cpu) +{ + struct rq *q; + + if (cpu >= nr_cpu_ids) + return 0; + + q = cpu_rq(cpu); + + return q->ave_nr_running; +} + unsigned long nr_iowait_cpu(int cpu) { struct rq *this = cpu_rq(cpu); From 42fd6d3525642b58eeda2a58122184f8d9deec9f Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Fri, 24 Aug 2012 18:09:57 -0700 Subject: [PATCH 207/678] cpuquiet: Add notifiers for the runnables governor The governor stops sampling whenever the device is busy and starts its sampling loop when the device is free. Change-Id: I6b4dae94a60f8091d34400028a8d1f58faa51380 Signed-off-by: Sai Charan Gurrappadi --- arch/arm/mach-tegra/cpuquiet.c | 23 +++++++++++++------ drivers/cpuquiet/governors/runnable_threads.c | 18 +++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 8006c3ea043..2e269cb8656 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -311,23 +311,32 @@ static void delay_callback(struct cpuquiet_attribute *attr) static void enable_callback(struct cpuquiet_attribute *attr) { + int disabled = -1; + mutex_lock(tegra3_cpu_lock); if (!enable && cpq_state != TEGRA_CPQ_DISABLED) { + disabled = 1; cpq_state = TEGRA_CPQ_DISABLED; - mutex_unlock(tegra3_cpu_lock); - cancel_delayed_work_sync(&cpuquiet_work); - pr_info("Tegra cpuquiet clusterswitch disabled\n"); - cpuquiet_device_busy(); - mutex_lock(tegra3_cpu_lock); } else if (enable && cpq_state == TEGRA_CPQ_DISABLED) { + disabled = 0; cpq_state = TEGRA_CPQ_IDLE; - pr_info("Tegra cpuquiet clusterswitch enabled\n"); tegra_cpu_set_speed_cap(NULL); - cpuquiet_device_free(); } mutex_unlock(tegra3_cpu_lock); + + if (disabled == -1) + return; + + if (disabled == 1) { + cancel_delayed_work_sync(&cpuquiet_work); + pr_info("Tegra cpuquiet clusterswitch disabled\n"); + cpuquiet_device_busy(); + } else if (!disabled) { + pr_info("Tegra cpuquiet clusterswitch enabled\n"); + cpuquiet_device_free(); + } } CPQ_BASIC_ATTRIBUTE(no_lp, 0644, bool); diff --git a/drivers/cpuquiet/governors/runnable_threads.c b/drivers/cpuquiet/governors/runnable_threads.c index 108f6126da7..7d98a727cc0 100644 --- a/drivers/cpuquiet/governors/runnable_threads.c +++ b/drivers/cpuquiet/governors/runnable_threads.c @@ -186,6 +186,22 @@ static int runnables_sysfs(void) return err; } +static void runnables_device_busy(void) +{ + if (runnables_state != DISABLED) { + runnables_state = DISABLED; + cancel_delayed_work_sync(&runnables_work); + } +} + +static void runnables_device_free(void) +{ + if (runnables_state == DISABLED) { + runnables_state = IDLE; + runnables_work_func(NULL); + } +} + static void runnables_stop(void) { runnables_state = DISABLED; @@ -228,6 +244,8 @@ static int runnables_start(void) struct cpuquiet_governor runnables_governor = { .name = "runnable", .start = runnables_start, + .device_free_notification = runnables_device_free, + .device_busy_notification = runnables_device_busy, .stop = runnables_stop, .owner = THIS_MODULE, }; From b0275439630ebb58f6b87f38192069c7f1d4de66 Mon Sep 17 00:00:00 2001 From: faux123 Date: Wed, 12 Sep 2012 20:34:44 -0500 Subject: [PATCH 208/678] drivers/cpuquiet/governors/runnable_threads: fix typo derp --- drivers/cpuquiet/governors/runnable_threads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuquiet/governors/runnable_threads.c b/drivers/cpuquiet/governors/runnable_threads.c index 7d98a727cc0..fe1df969420 100644 --- a/drivers/cpuquiet/governors/runnable_threads.c +++ b/drivers/cpuquiet/governors/runnable_threads.c @@ -122,7 +122,7 @@ static void runnables_work_func(struct work_struct *work) case UP: cpu = cpumask_next_zero(0, cpu_online_mask); up = true; - sample = true + sample = true; break; case DOWN: cpu = get_lightest_loaded_cpu_n(); From 18b9b98ff478a91892947f444bc15b89f3c13cf6 Mon Sep 17 00:00:00 2001 From: Sai Charan Gurrappadi Date: Thu, 5 Jul 2012 10:59:01 -0700 Subject: [PATCH 209/678] ARM: tegra: Remove duplicate clock inits Change-Id: I80c384d1aa4b1e45a4542acbde6b904f4a014aff Signed-off-by: Sai Charan Gurrappadi --- arch/arm/mach-tegra/cpuquiet.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 2e269cb8656..4f227293cfe 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -411,13 +411,6 @@ int tegra_auto_hotplug_init(struct mutex *cpu_lock) INIT_DELAYED_WORK(&cpuquiet_work, tegra_cpuquiet_work_func); INIT_WORK(&minmax_work, min_max_constraints_workfunc); - cpu_clk = clk_get_sys(NULL, "cpu"); - cpu_g_clk = clk_get_sys(NULL, "cpu_g"); - cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); - - if (IS_ERR(cpu_clk) || IS_ERR(cpu_g_clk) || IS_ERR(cpu_lp_clk)) - return -ENOENT; - idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; idle_bottom_freq = clk_get_min_rate(cpu_g_clk) / 1000; From 352441b16b4aaac71144fa8e139ecd866e0ddd2e Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 23 Sep 2012 08:36:46 -0700 Subject: [PATCH 210/678] drivers/cpuquiet/governors: make balanced as default governor --- drivers/cpuquiet/governors/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuquiet/governors/Makefile b/drivers/cpuquiet/governors/Makefile index e199d73008f..94c78991f14 100644 --- a/drivers/cpuquiet/governors/Makefile +++ b/drivers/cpuquiet/governors/Makefile @@ -1 +1 @@ -obj-y += userspace.o balanced.o runnable_threads.o +obj-y += balanced.o userspace.o runnable_threads.o From 3ccc6bd61bc328d92ac6e63358c664167810b7ae Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 27 Nov 2012 11:20:13 -0500 Subject: [PATCH 211/678] mach-tegra: cpuquiet.c: decrease down delay to 500ms --- arch/arm/mach-tegra/cpuquiet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 4f227293cfe..4e176c70429 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -39,7 +39,7 @@ #define INITIAL_STATE TEGRA_CPQ_IDLE #define UP_DELAY_MS 70 -#define DOWN_DELAY_MS 2000 +#define DOWN_DELAY_MS 500 static struct mutex *tegra3_cpu_lock; static struct workqueue_struct *cpuquiet_wq; From 65e762a80e89543269609022d3974e59f84c1090 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 27 Nov 2012 18:09:54 -0500 Subject: [PATCH 212/678] tegra3_clocks.c: use same frequency table for 700Mhz LP --- arch/arm/mach-tegra/tegra3_clocks.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 32fce0ed171..581d90dd906 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4645,17 +4645,16 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { { 4, 475000 }, #ifdef CONFIG_LP_OVERCLOCK #ifdef CONFIG_LP_OC_700 - { 5, 550000 }, { 5, 700000 }, - { 7, 860000 }, - { 8, 1000000 }, - { 9, 1100000 }, - {10, 1200000 }, - {11, 1300000 }, - {12, 1400000 }, - {13, 1500000 }, - {14, 1600000 }, - {15, CPUFREQ_TABLE_END }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, CPUFREQ_TABLE_END }, #endif #ifdef CONFIG_LP_OC_620 { 5, 620000 }, From 9a3a513120c53e5f31e9324a7b493a1598bbdee4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 27 Nov 2012 18:10:18 -0500 Subject: [PATCH 213/678] cpufreq: ondemand.c: remove unneeded and missing include --- drivers/cpufreq/cpufreq_ondemand.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d0b1bf83459..c3bb3f16290 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -31,7 +31,6 @@ #include "../../arch/arm/mach-tegra/clock.h" #include "../../arch/arm/mach-tegra/pm.h" -#include "../../arch/arm/mach-tegra/tegra_pmqos.h" /* * dbs is used in this file as a shortform for demandbased switching From 0953c337e54f5dbc466ba4321e9cb4fdc1fb0e2f Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 29 Nov 2012 11:22:29 -0500 Subject: [PATCH 214/678] mach-tegra.c: Add 666MHz LP and 600/666/700Mhz GPU overclocks cleaned up tegra dvfs a bit as well --- arch/arm/mach-tegra/Kconfig | 8 ++ arch/arm/mach-tegra/tegra3_clocks.c | 66 +++++------ arch/arm/mach-tegra/tegra3_dvfs.c | 158 +++++++++++++++++--------- drivers/cpufreq/cpufreq_interactive.c | 3 + 4 files changed, 142 insertions(+), 93 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index ea9f2222f49..d84bef99a33 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -303,6 +303,12 @@ choice bool "484 MHz" config GPU_OC_520 bool "520 MHz" + config GPU_OC_600 + bool "600 MHz" + config GPU_OC_666 + bool "666 MHz" + config GPU_OC_700 + bool "700 MHz" endchoice @@ -329,6 +335,8 @@ choice bool "550 MHz" config LP_OC_620 bool "620 MHz" + config LP_OC_666 + bool "666 MHz" config LP_OC_700 bool "700 MHz" diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 581d90dd906..6417a0dabf5 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3211,6 +3211,19 @@ static struct clk tegra_pll_ref = { }; static struct clk_pll_freq_table tegra_pll_c_freq_table[] = { + + { 12000000, 1400000000, 700, 6, 1, 8}, + { 13000000, 1400000000, 700, 13, 2, 8}, /* custom: 1400 MHz for 700Mhz GPU */ + { 16800000, 1400000000, 666, 8, 1, 8}, + { 19200000, 1400000000, 656, 9, 1, 8}, + { 26000000, 1400000000, 700, 13, 1, 8}, + + { 12000000, 1332000000, 666, 6, 1, 8}, + { 13000000, 1332000000, 666, 13, 2, 8}, /* custom: 1332 MHz for 666Mhz GPU */ + { 16800000, 1332000000, 555, 7, 1, 8}, + { 19200000, 1332000000, 555, 8, 1, 8}, + { 26000000, 1332000000, 666, 13, 1, 8}, + { 12000000, 1200000000, 600, 6, 1, 8}, { 13000000, 1200000000, 923, 10, 1, 8}, /* actual: 1199.9 MHz */ { 16800000, 1200000000, 500, 7, 1, 8}, @@ -3933,7 +3946,7 @@ static struct clk tegra_clk_virtual_cpu_lp = { .name = "cpu_lp", .parent = &tegra_clk_cclk_lp, .ops = &tegra_cpu_ops, - .max_rate = 720000000, + .max_rate = 700000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -4256,7 +4269,7 @@ struct clk tegra_list_clks[] = { PERIPH_CLK("vcp", "tegra-avp", "vcp", 29, 0, 250000000, mux_clk_m, 0), PERIPH_CLK("bsea", "tegra-avp", "bsea", 62, 0, 250000000, mux_clk_m, 0), PERIPH_CLK("bsev", "tegra-aes", "bsev", 63, 0, 250000000, mux_clk_m, 0), - PERIPH_CLK("vde", "vde", NULL, 61, 0x1c8, 600000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("vde", "vde", NULL, 61, 0x1c8, 700000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("csite", "csite", NULL, 73, 0x1d4, 144000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71), /* max rate ??? */ PERIPH_CLK("la", "la", NULL, 76, 0x1f8, 26000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71), PERIPH_CLK("owr", "tegra_w1", NULL, 71, 0x1cc, 26000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | PERIPH_ON_APB), @@ -4284,19 +4297,19 @@ struct clk tegra_list_clks[] = { PERIPH_CLK("uarte_dbg", "serial8250.0", "uarte", 66, 0x1c4, 900000000, mux_pllp_clkm, MUX | DIV_U151 | DIV_U151_UART | PERIPH_ON_APB), PERIPH_CLK_EX("vi", "tegra_camera", "vi", 20, 0x148, 470000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT, &tegra_vi_clk_ops), PERIPH_CLK("vi_sensor", "tegra_camera", "vi_sensor", 20, 0x1a8, 150000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | PERIPH_NO_RESET), - PERIPH_CLK("3d", "3d", NULL, 24, 0x158, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), - PERIPH_CLK("3d2", "3d2", NULL, 98, 0x3b0, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), - PERIPH_CLK("2d", "2d", NULL, 21, 0x15c, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE), - PERIPH_CLK("epp", "epp", NULL, 19, 0x16c, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), - PERIPH_CLK("mpe", "mpe", NULL, 60, 0x170, 600000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("3d", "3d", NULL, 24, 0x158, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), + PERIPH_CLK("3d2", "3d2", NULL, 98, 0x3b0, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET), + PERIPH_CLK("2d", "2d", NULL, 21, 0x15c, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE), + PERIPH_CLK("epp", "epp", NULL, 19, 0x16c, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("mpe", "mpe", NULL, 60, 0x170, 700000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("host1x", "host1x", NULL, 28, 0x180, 300000000, mux_pllm_pllc_pllp_plla, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("cve", "cve", NULL, 49, 0x140, 250000000, mux_pllp_plld_pllc_clkm, MUX | DIV_U71), /* requires min voltage */ PERIPH_CLK("tvo", "tvo", NULL, 49, 0x188, 250000000, mux_pllp_plld_pllc_clkm, MUX | DIV_U71), /* requires min voltage */ PERIPH_CLK_EX("dtv", "dtv", NULL, 79, 0x1dc, 250000000, mux_clk_m, 0, &tegra_dtv_clk_ops), PERIPH_CLK("hdmi", "hdmi", NULL, 51, 0x18c, 148500000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8 | DIV_U71), PERIPH_CLK("tvdac", "tvdac", NULL, 53, 0x194, 220000000, mux_pllp_plld_pllc_clkm, MUX | DIV_U71), /* requires min voltage */ - PERIPH_CLK("disp1", "tegradc.0", NULL, 27, 0x138, 600000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), - PERIPH_CLK("disp2", "tegradc.1", NULL, 26, 0x13c, 600000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), + PERIPH_CLK("disp1", "tegradc.0", NULL, 27, 0x138, 700000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), + PERIPH_CLK("disp2", "tegradc.1", NULL, 26, 0x13c, 700000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm, MUX | MUX8), PERIPH_CLK("usbd", "fsl-tegra-udc", NULL, 22, 0, 480000000, mux_clk_m, 0), /* requires min voltage */ PERIPH_CLK("usb2", "tegra-ehci.1", NULL, 58, 0, 480000000, mux_clk_m, 0), /* requires min voltage */ PERIPH_CLK("usb3", "tegra-ehci.2", NULL, 59, 0, 480000000, mux_clk_m, 0), /* requires min voltage */ @@ -4314,7 +4327,7 @@ struct clk tegra_list_clks[] = { PERIPH_CLK("i2cslow", "i2cslow", NULL, 81, 0x3fc, 26000000, mux_pllp_pllc_clk32_clkm, MUX | DIV_U71 | PERIPH_ON_APB), PERIPH_CLK("pcie", "tegra-pcie", "pcie", 70, 0, 250000000, mux_clk_m, 0), PERIPH_CLK("afi", "tegra-pcie", "afi", 72, 0, 250000000, mux_clk_m, 0), - PERIPH_CLK("se", "se", NULL, 127, 0x42c, 625000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), + PERIPH_CLK("se", "se", NULL, 127, 0x42c, 700000000, mux_pllp_pllc_pllm_clkm, MUX | DIV_U71 | DIV_U71_INT), PERIPH_CLK("mselect", "mselect", NULL, 99, 0x3b4, 108000000, mux_pllp_clkm, MUX | DIV_U71), SHARED_CLK("avp.sclk", "tegra-avp", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), @@ -4646,42 +4659,20 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { #ifdef CONFIG_LP_OVERCLOCK #ifdef CONFIG_LP_OC_700 { 5, 700000 }, - { 6, 860000 }, - { 7, 1000000 }, - { 8, 1100000 }, - { 9, 1200000 }, - {10, 1300000 }, - {11, 1400000 }, - {12, 1500000 }, - {13, 1600000 }, - {14, CPUFREQ_TABLE_END }, +#endif +#ifdef CONFIG_LP_OC_666 + { 5, 666000 }, #endif #ifdef CONFIG_LP_OC_620 { 5, 620000 }, - { 6, 860000 }, - { 7, 1000000 }, - { 8, 1100000 }, - { 9, 1200000 }, - {10, 1300000 }, - {11, 1400000 }, - {12, 1500000 }, - {13, 1600000 }, - {14, CPUFREQ_TABLE_END }, #endif #ifdef CONFIG_LP_OC_550 { 5, 550000 }, - { 6, 860000 }, - { 7, 1000000 }, - { 8, 1100000 }, - { 9, 1200000 }, - {10, 1300000 }, - {11, 1400000 }, - {12, 1500000 }, - {13, 1600000 }, - {14, CPUFREQ_TABLE_END }, #endif #else { 5, 620000 }, + +#endif { 6, 860000 }, { 7, 1000000 }, { 8, 1100000 }, @@ -4691,7 +4682,6 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { {12, 1500000 }, {13, 1600000 }, {14, CPUFREQ_TABLE_END }, -#endif }; static struct cpufreq_frequency_table freq_table_1p7GHz[] = { diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 044a611145e..470c9345454 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -225,14 +225,17 @@ static struct dvfs core_dvfs_table[] = { /* Clock limits for internal blocks, PLLs */ CORE_DVFS("cpu_lp", 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), #ifdef CONFIG_LP_OVERCLOCK -#ifdef CONFIG_LP_OC_700 - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 342000, 475000, 550000, 700000, 700000, 700000, 700000, 700000), +#ifdef CONFIG_LP_OC_550 + CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 550000, 550000, 550000, 550000, 550000), #endif #ifdef CONFIG_LP_OC_620 CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), #endif -#ifdef CONFIG_LP_OC_550 - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 550000, 550000, 550000, 550000, 550000), +#ifdef CONFIG_LP_OC_666 + CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 342000, 475000, 550000, 666000, 666000, 666000, 666000, 666000), +#endif +#ifdef CONFIG_LP_OC_700 + CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 342000, 475000, 620000, 700000, 700000, 700000, 700000, 700000), #endif #else CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), @@ -257,16 +260,16 @@ static struct dvfs core_dvfs_table[] = { /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ + CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -276,14 +279,6 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), @@ -293,31 +288,42 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), #endif #else - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), @@ -344,37 +350,79 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("se", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), CORE_DVFS("host1x", 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), +#ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_446 + CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 223000, 223000, 223000, 223000, 223000, 223000), +#endif +#ifdef CONFIG_GPU_OC_446 + CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 242000, 242000, 242000, 242000, 242000, 242000), +#endif +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 260000, 260000, 260000, 260000, 260000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 300000, 300000, 300000, 300000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 333000, 333000, 333000, 333000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 350000, 350000, 350000, 350000), +#endif +#else CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), +#endif CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), + + CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), - CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), #endif #else - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), +#endif CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), -#endif +#ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_446 + CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 892000, 892000, 892000, 892000, 892000, 892000), +#endif +#ifdef CONFIG_GPU_OC_484 + CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 968000, 968000, 968000, 968000, 968000, 968000), +#endif +#ifdef CONFIG_GPU_OC_520 + CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 1040000, 1040000, 1040000, 1040000, 1040000), +#endif +#ifdef CONFIG_GPU_OC_600 + CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1200000, 1200000, 1200000, 1200000), +#endif +#ifdef CONFIG_GPU_OC_666 + CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1332000, 1332000, 1332000, 1332000), +#endif +#ifdef CONFIG_GPU_OC_700 + CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1400000, 1400000, 1400000, 1400000), +#endif +#else CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), - +#endif /* * PLLM dvfs is common across all speedo IDs with one special exception * for T30 and T33, rev A02+, provided PLLM usage is restricted. Both diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index da22e89ca0c..5a08993f5df 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -87,6 +87,9 @@ static unsigned int hispeed_freq = 1100000; #ifdef CONFIG_LP_OC_700 static unsigned int input_boost_freq = 700000; #endif +#ifdef CONFIG_LP_OC_666 +static unsigned int input_boost_freq = 666000; +#endif #ifdef CONFIG_LP_OC_620 static unsigned int input_boost_freq = 620000; #endif From 0874833f944325b64ac25661baf025412ef41ebd Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 2 Dec 2012 16:30:15 -0500 Subject: [PATCH 215/678] mach-tegra: edp: don't cap dvfs table and voltage --- arch/arm/mach-tegra/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index d91ad83fd6a..430278cdd34 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -103,7 +103,7 @@ static struct board_info pmu_board_info; static struct board_info display_board_info; static struct board_info camera_board_info; -static int pmu_core_edp = 1200; /* default 1.2V EDP limit */ +static int pmu_core_edp = 1700; /* default 1.2V EDP limit */ static int board_panel_type; static enum power_supply_type pow_supply_type = POWER_SUPPLY_TYPE_MAINS; From 105505c7e4e58076ef1fbc643c9f60fa5376f13f Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 2 Dec 2012 16:41:46 -0500 Subject: [PATCH 216/678] mach-tegra: add GPU OC interface and EMC, AVP, and LP UV interfaces --- arch/arm/configs/metallice_grouper_defconfig | 62 +++- arch/arm/mach-tegra/dvfs.h | 2 +- arch/arm/mach-tegra/tegra3_clocks.c | 4 + arch/arm/mach-tegra/tegra3_dvfs.c | 362 ++++++++++--------- drivers/cpufreq/cpufreq.c | 291 ++++++++++++++- 5 files changed, 528 insertions(+), 193 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index bb05e81ed7f..22a976421c8 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,14 +38,14 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-446-620" +CONFIG_LOCALVERSION="-MKernel-446-666" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_LZO=y -CONFIG_KERNEL_GZIP=y +# CONFIG_KERNEL_GZIP is not set # CONFIG_KERNEL_LZMA is not set -# CONFIG_KERNEL_LZO is not set +CONFIG_KERNEL_LZO=y CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SWAP=y # CONFIG_SYSVIPC is not set @@ -315,9 +315,14 @@ CONFIG_GPU_OVERCLOCK=y CONFIG_GPU_OC_446=y # CONFIG_GPU_OC_484 is not set # CONFIG_GPU_OC_520 is not set +# CONFIG_GPU_OC_600 is not set +# CONFIG_GPU_OC_666 is not set +# CONFIG_GPU_OC_700 is not set CONFIG_LP_OVERCLOCK=y # CONFIG_LP_OC_550 is not set -CONFIG_LP_OC_620=y +# CONFIG_LP_OC_620 is not set +CONFIG_LP_OC_666=y +# CONFIG_LP_OC_700 is not set CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y CONFIG_TEGRA_IOVMM_SMMU=y @@ -335,6 +340,7 @@ CONFIG_TEGRA_MC_PROFILE=y CONFIG_TEGRA_EDP_LIMITS=y CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 # CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set +CONFIG_TEGRA_RUNNABLE_THREAD=y CONFIG_TEGRA_VARIANT_INFO=y CONFIG_USB_HOTPLUG=y CONFIG_TEGRA_DYNAMIC_PWRDET=y @@ -344,7 +350,7 @@ CONFIG_TEGRA_BB_XMM_POWER=y # CONFIG_TEGRA_BB_XMM_POWER2 is not set # CONFIG_TEGRA_THERMAL_SYSFS is not set CONFIG_TEGRA_PLLM_RESTRICTED=y -CONFIG_TEGRA_WDT_RECOVERY=y +# CONFIG_TEGRA_WDT_RECOVERY is not set CONFIG_TEGRA_LP2_ARM_TWD=y CONFIG_TEGRA_SLOW_CSITE=y # CONFIG_TEGRA_PREINIT_CLOCKS is not set @@ -509,9 +515,9 @@ CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set @@ -528,6 +534,11 @@ CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPU_IDLE_GOV_MENU=y +# +# CPUQUIET Framework +# +# CONFIG_CPUQUIET_FRAMEWORK is not set + # # Floating point emulation # @@ -611,9 +622,28 @@ CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_INET_XFRM_MODE_BEET=y # CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set -# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=y CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_CONG_WESTWOOD=y +CONFIG_TCP_CONG_HTCP=y +CONFIG_TCP_CONG_HSTCP=y +CONFIG_TCP_CONG_HYBLA=y +CONFIG_TCP_CONG_VEGAS=y +CONFIG_TCP_CONG_SCALABLE=y +CONFIG_TCP_CONG_LP=y +CONFIG_TCP_CONG_VENO=y +CONFIG_TCP_CONG_YEAH=y +CONFIG_TCP_CONG_ILLINOIS=y +# CONFIG_DEFAULT_BIC is not set +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_HYBLA is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_VENO is not set +CONFIG_DEFAULT_WESTWOOD=y +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="westwood" # CONFIG_TCP_MD5SIG is not set CONFIG_IPV6=y CONFIG_IPV6_PRIVACY=y @@ -1284,7 +1314,7 @@ CONFIG_USB_NET_CDC_SUBSET=y # CONFIG_USB_IPHETH is not set # CONFIG_USB_SIERRA_NET is not set # CONFIG_USB_VL600 is not set -# CONFIG_USB_NET_RAW_IP is not set +CONFIG_USB_NET_RAW_IP=y # CONFIG_WAN is not set # @@ -1451,7 +1481,7 @@ CONFIG_INPUT_GPIO=y # # Proximity sensors # -# CONFIG_SENSORS_CAP1106 is not set +CONFIG_SENSORS_CAP1106=y # # Hardware I/O ports @@ -2285,7 +2315,7 @@ CONFIG_SND_SOC_SPDIF=y CONFIG_HID_SUPPORT=y CONFIG_HID=y # CONFIG_HIDRAW is not set -# CONFIG_UHID is not set +CONFIG_UHID=y # # USB Input Devices @@ -2494,7 +2524,7 @@ CONFIG_USB_SERIAL_OPTION=y # CONFIG_USB_SERIAL_ZIO is not set # CONFIG_USB_SERIAL_SSU100 is not set # CONFIG_USB_SERIAL_DEBUG is not set -# CONFIG_USB_SERIAL_BASEBAND is not set +CONFIG_USB_SERIAL_BASEBAND=y # # USB Miscellaneous drivers @@ -2565,7 +2595,6 @@ CONFIG_USB_ULPI=y CONFIG_USB_ULPI_VIEWPORT=y # CONFIG_NOP_USB_XCEIV is not set CONFIG_USB_TEGRA_OTG=y -CONFIG_USB_OTG_ON_CHARGING=y # CONFIG_UWB is not set CONFIG_MMC=y # CONFIG_MMC_DEBUG is not set @@ -2589,6 +2618,7 @@ CONFIG_MMC_TEST=y # CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_IO_ACCESSORS=y +CONFIG_MMC_SDHCI_NATIVE_BLOCKSIZE=y # CONFIG_MMC_SDHCI_PCI is not set CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y @@ -2901,7 +2931,7 @@ CONFIG_CLKSRC_MMIO=y CONFIG_IOMMU_SUPPORT=y # CONFIG_TEGRA_IOMMU_SMMU is not set # CONFIG_VIRT_DRIVERS is not set -# CONFIG_RIL is not set +CONFIG_RIL=y # # File systems @@ -3234,6 +3264,7 @@ CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_RMD256 is not set # CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA1_ARM=y CONFIG_CRYPTO_SHA256=y # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_TGR192 is not set @@ -3243,6 +3274,7 @@ CONFIG_CRYPTO_SHA256=y # Ciphers # CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_AES_ARM=y # CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_ARC4=y # CONFIG_CRYPTO_BLOWFISH is not set diff --git a/arch/arm/mach-tegra/dvfs.h b/arch/arm/mach-tegra/dvfs.h index eaecf425fe8..50b6dfe8230 100644 --- a/arch/arm/mach-tegra/dvfs.h +++ b/arch/arm/mach-tegra/dvfs.h @@ -89,7 +89,7 @@ struct dvfs { int freqs_mult; unsigned long freqs[MAX_DVFS_FREQS]; unsigned long alt_freqs[MAX_DVFS_FREQS]; - const int *millivolts; + int *millivolts; struct dvfs_rail *dvfs_rail; bool auto_dvfs; enum dvfs_alt_freqs alt_freqs_state; diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 6417a0dabf5..0dcf3a7d953 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4745,6 +4745,10 @@ static int clip_cpu_rate_limits( cpu_clk_lp->max_rate, ret ? "outside" : "at the bottom"); return ret; } + + /* force idx for max LP*/ + idx=5; + cpu_clk_lp->max_rate = freq_table[idx].frequency * 1000; cpu_clk_g->min_rate = freq_table[idx-1].frequency * 1000; data->suspend_index = idx; diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 470c9345454..1e099f7a0b3 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -39,11 +39,21 @@ static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; static const int core_millivolts[MAX_DVFS_FREQS] = { - 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350}; + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; + +int avp_millivolts[MAX_DVFS_FREQS] = { + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; + +int lp_cpu_millivolts[MAX_DVFS_FREQS] = { + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; + +int emc_millivolts[MAX_DVFS_FREQS] = { + 950, 1000, 1050, 1100, 1150, 1200, 1200, 1200, 1200}; #define KHZ 1000 #define MHZ 1000000 @@ -65,7 +75,7 @@ static struct dvfs_rail tegra3_dvfs_rail_vdd_cpu = { static struct dvfs_rail tegra3_dvfs_rail_vdd_core = { .reg_id = "vdd_core", - .max_millivolts = 1350, + .max_millivolts = 1500, .min_millivolts = 950, .step = VDD_SAFE_STEP, }; @@ -208,14 +218,14 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", -1, -1, MHZ, 1, 1, 216, 216, 300), }; -#define CORE_DVFS(_clk_name, _speedo_id, _auto, _mult, _freqs...) \ +#define CORE_DVFS(_clk_name, _millivolts, _speedo_id, _auto, _mult, _freqs...) \ { \ .clk_name = _clk_name, \ .speedo_id = _speedo_id, \ .process_id = -1, \ .freqs = {_freqs}, \ .freqs_mult = _mult, \ - .millivolts = core_millivolts, \ + .millivolts = _millivolts, \ .auto_dvfs = _auto, \ .dvfs_rail = &tegra3_dvfs_rail_vdd_core, \ } @@ -223,205 +233,205 @@ static struct dvfs cpu_dvfs_table[] = { static struct dvfs core_dvfs_table[] = { /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ /* Clock limits for internal blocks, PLLs */ - CORE_DVFS("cpu_lp", 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), #ifdef CONFIG_LP_OVERCLOCK #ifdef CONFIG_LP_OC_550 - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 550000, 550000, 550000, 550000, 550000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 550000, 550000, 550000, 550000, 550000), #endif #ifdef CONFIG_LP_OC_620 - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), #endif #ifdef CONFIG_LP_OC_666 - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 342000, 475000, 550000, 666000, 666000, 666000, 666000, 666000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 550000, 666000, 666000, 666000, 666000, 666000), #endif #ifdef CONFIG_LP_OC_700 - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 342000, 475000, 620000, 700000, 700000, 700000, 700000, 700000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 620000, 700000, 700000, 700000, 700000, 700000), #endif #else - CORE_DVFS("cpu_lp", 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), #endif - CORE_DVFS("cpu_lp", 2, 1, KHZ, 204000, 295000, 370000, 428000, 475000, 513000, 579000, 620000, 620000), - CORE_DVFS("cpu_lp", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 450000, 450000, 450000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 2, 1, KHZ, 204000, 295000, 370000, 428000, 475000, 513000, 579000, 620000, 620000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 450000, 450000, 450000), - CORE_DVFS("emc", 0, 1, KHZ, 1, 266500, 266500, 266500, 266500, 533000, 533000, 533000, 533000), - CORE_DVFS("emc", 1, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 667000, 667000), - CORE_DVFS("emc", 2, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 800000, 900000), - CORE_DVFS("emc", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), + CORE_DVFS("emc", emc_millivolts, 0, 1, KHZ, 1, 266500, 266500, 266500, 266500, 533000, 533000, 533000, 533000), + CORE_DVFS("emc", emc_millivolts, 1, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 667000, 667000), + CORE_DVFS("emc", emc_millivolts, 2, 1, KHZ, 102000, 408000, 408000, 408000, 408000, 667000, 667000, 800000, 900000), + CORE_DVFS("emc", emc_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), - CORE_DVFS("sbus", 0, 1, KHZ, 1, 136000, 164000, 191000, 216000, 216000, 216000, 216000, 216000), - CORE_DVFS("sbus", 1, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 267000, 267000, 267000), - CORE_DVFS("sbus", 2, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 334000, 334000, 334000), - CORE_DVFS("sbus", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 378000, 378000, 378000), + CORE_DVFS("sbus", core_millivolts, 0, 1, KHZ, 1, 136000, 164000, 191000, 216000, 216000, 216000, 216000, 216000), + CORE_DVFS("sbus", core_millivolts, 1, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 267000, 267000, 267000), + CORE_DVFS("sbus", core_millivolts, 2, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 334000, 334000, 334000), + CORE_DVFS("sbus", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 378000, 378000, 378000), - CORE_DVFS("vi", 0, 1, KHZ, 1, 216000, 285000, 300000, 300000, 300000, 300000, 300000, 300000), - CORE_DVFS("vi", 1, 1, KHZ, 1, 216000, 267000, 300000, 371000, 409000, 409000, 409000, 409000), - CORE_DVFS("vi", 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), - CORE_DVFS("vi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), + CORE_DVFS("vi", core_millivolts, 0, 1, KHZ, 1, 216000, 285000, 300000, 300000, 300000, 300000, 300000, 300000), + CORE_DVFS("vi", core_millivolts, 1, 1, KHZ, 1, 216000, 267000, 300000, 371000, 409000, 409000, 409000, 409000), + CORE_DVFS("vi", core_millivolts, 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), + CORE_DVFS("vi", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ - CORE_DVFS("vde", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("mpe", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("2d", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("epp", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d2", 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("se", 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("vde", avp_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("mpe", avp_millivolts, 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("2d", avp_millivolts, 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("epp", avp_millivolts, 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d", avp_millivolts, 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d2", avp_millivolts, 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("se", avp_millivolts, 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 484000, 484000, 484000, 484000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), #endif #ifdef CONFIG_GPU_OC_600 - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 600000, 600000, 600000, 600000), #endif #ifdef CONFIG_GPU_OC_666 - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 666000, 666000, 666000, 666000), #endif #ifdef CONFIG_GPU_OC_700 - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 700000, 700000, 700000, 700000), #endif #else - CORE_DVFS("vde", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("mpe", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("2d", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("epp", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("3d2", 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), - CORE_DVFS("se", 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), #endif - CORE_DVFS("vde", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("mpe", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("2d", 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("epp", 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("3d", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("3d2", 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - CORE_DVFS("se", 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), - - CORE_DVFS("vde", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("mpe", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("2d", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("epp", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("3d", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("3d2", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), - CORE_DVFS("se", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), - - CORE_DVFS("host1x", 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), + CORE_DVFS("vde", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("mpe", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("2d", avp_millivolts, 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("epp", avp_millivolts, 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("3d", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("3d2", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + CORE_DVFS("se", avp_millivolts, 2, 1, KHZ, 1, 267000, 304000, 361000, 408000, 446000, 484000, 520000, 600000), + + CORE_DVFS("vde", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("mpe", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("2d", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("epp", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("3d", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("3d2", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), + CORE_DVFS("se", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), + + CORE_DVFS("host1x", core_millivolts, 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 223000, 223000, 223000, 223000, 223000, 223000), + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 223000, 223000, 223000, 223000, 223000, 223000), #endif #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 242000, 242000, 242000, 242000, 242000, 242000), + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 242000, 242000, 242000, 242000, 242000, 242000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 260000, 260000, 260000, 260000, 260000), + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 260000, 260000, 260000, 260000, 260000), #endif #ifdef CONFIG_GPU_OC_600 - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 300000, 300000, 300000, 300000), + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 300000, 300000, 300000, 300000), #endif #ifdef CONFIG_GPU_OC_666 - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 333000, 333000, 333000, 333000), + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 333000, 333000, 333000, 333000), #endif #ifdef CONFIG_GPU_OC_700 - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 350000, 350000, 350000, 350000), + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 350000, 350000, 350000, 350000), #endif #else - CORE_DVFS("host1x", 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), #endif - CORE_DVFS("host1x", 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), - CORE_DVFS("host1x", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), + CORE_DVFS("host1x", core_millivolts, 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), + CORE_DVFS("host1x", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), - CORE_DVFS("cbus", 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("cbus", core_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), #endif #ifdef CONFIG_GPU_OC_600 - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), #endif #ifdef CONFIG_GPU_OC_666 - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), #endif #ifdef CONFIG_GPU_OC_700 - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), #endif #else - CORE_DVFS("cbus", 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), #endif - CORE_DVFS("cbus", 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("cbus", 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), + CORE_DVFS("cbus", core_millivolts, 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("cbus", core_millivolts, 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 892000, 892000, 892000, 892000, 892000, 892000), + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 892000, 892000, 892000, 892000, 892000, 892000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 968000, 968000, 968000, 968000, 968000, 968000), + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 968000, 968000, 968000, 968000, 968000, 968000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 1040000, 1040000, 1040000, 1040000, 1040000), + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 1040000, 1040000, 1040000, 1040000, 1040000), #endif #ifdef CONFIG_GPU_OC_600 - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1200000, 1200000, 1200000, 1200000), + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1200000, 1200000, 1200000, 1200000), #endif #ifdef CONFIG_GPU_OC_666 - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1332000, 1332000, 1332000, 1332000), + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1332000, 1332000, 1332000, 1332000), #endif #ifdef CONFIG_GPU_OC_700 - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1400000, 1400000, 1400000, 1400000), + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1400000, 1400000, 1400000, 1400000), #endif #else - CORE_DVFS("pll_c", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), #endif /* * PLLM dvfs is common across all speedo IDs with one special exception @@ -429,50 +439,50 @@ static struct dvfs core_dvfs_table[] = { * common and restricted table are included, and table selection is * handled by is_pllm_dvfs() below. */ - CORE_DVFS("pll_m", -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), + CORE_DVFS("pll_m", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), #ifdef CONFIG_TEGRA_PLLM_RESTRICTED - CORE_DVFS("pll_m", 2, 1, KHZ, 533000, 800000, 800000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), + CORE_DVFS("pll_m", core_millivolts, 2, 1, KHZ, 533000, 800000, 800000, 800000, 800000, 1066000, 1066000, 1066000, 1066000), #endif /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ /* Clock limits for I/O peripherals */ - CORE_DVFS("mipi", 0, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), - CORE_DVFS("mipi", 1, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), - CORE_DVFS("mipi", 2, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), - CORE_DVFS("mipi", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), - - CORE_DVFS("fuse_burn", -1, 1, KHZ, 1, 1, 1, 1, 26000, 26000, 26000, 26000, 26000), - CORE_DVFS("sdmmc1", -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), - CORE_DVFS("sdmmc3", -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), - CORE_DVFS("ndflash", -1, 1, KHZ, 1, 120000, 120000, 120000, 200000, 200000, 200000, 200000, 200000), - - CORE_DVFS("nor", 0, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), - CORE_DVFS("nor", 1, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), - CORE_DVFS("nor", 2, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), - CORE_DVFS("nor", 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 108000, 108000, 108000), - - CORE_DVFS("sbc1", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc2", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc3", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc4", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc5", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("sbc6", -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - - CORE_DVFS("usbd", -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - CORE_DVFS("usb2", -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - CORE_DVFS("usb3", -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - - CORE_DVFS("sata", -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), - CORE_DVFS("sata_oob", -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), - CORE_DVFS("pcie", -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), - CORE_DVFS("afi", -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), - CORE_DVFS("pll_e", -1, 1, KHZ, 1, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000), - - CORE_DVFS("tvdac", -1, 1, KHZ, 1, 220000, 220000, 220000, 220000, 220000, 220000, 220000, 220000), - CORE_DVFS("tvo", -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), - CORE_DVFS("cve", -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), - CORE_DVFS("dsia", -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), - CORE_DVFS("dsib", -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), - CORE_DVFS("hdmi", -1, 1, KHZ, 1, 148500, 148500, 148500, 148500, 148500, 148500, 148500, 148500), + CORE_DVFS("mipi", core_millivolts, 0, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), + CORE_DVFS("mipi", core_millivolts, 1, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), + CORE_DVFS("mipi", core_millivolts, 2, 1, KHZ, 1, 1, 1, 1, 1, 60000, 60000, 60000, 60000), + CORE_DVFS("mipi", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 1, 1, 1), + + CORE_DVFS("fuse_burn", core_millivolts, -1, 1, KHZ, 1, 1, 1, 1, 26000, 26000, 26000, 26000, 26000), + CORE_DVFS("sdmmc1", core_millivolts, -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), + CORE_DVFS("sdmmc3", core_millivolts, -1, 1, KHZ, 104000, 104000, 104000, 104000, 104000, 208000, 208000, 208000, 208000), + CORE_DVFS("ndflash", core_millivolts, -1, 1, KHZ, 1, 120000, 120000, 120000, 200000, 200000, 200000, 200000, 200000), + + CORE_DVFS("nor", core_millivolts, 0, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), + CORE_DVFS("nor", core_millivolts, 1, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), + CORE_DVFS("nor", core_millivolts, 2, 1, KHZ, 1, 115000, 130000, 130000, 133000, 133000, 133000, 133000, 133000), + CORE_DVFS("nor", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 108000, 108000, 108000), + + CORE_DVFS("sbc1", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc2", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc3", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc4", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc5", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + CORE_DVFS("sbc6", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), + + CORE_DVFS("usbd", core_millivolts, -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + CORE_DVFS("usb2", core_millivolts, -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + CORE_DVFS("usb3", core_millivolts, -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + + CORE_DVFS("sata", core_millivolts, -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), + CORE_DVFS("sata_oob", core_millivolts, -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), + CORE_DVFS("pcie", core_millivolts, -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), + CORE_DVFS("afi", core_millivolts, -1, 1, KHZ, 1, 250000, 250000, 250000, 250000, 250000, 250000, 250000, 250000), + CORE_DVFS("pll_e", core_millivolts, -1, 1, KHZ, 1, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000), + + CORE_DVFS("tvdac", core_millivolts, -1, 1, KHZ, 1, 220000, 220000, 220000, 220000, 220000, 220000, 220000, 220000), + CORE_DVFS("tvo", core_millivolts, -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), + CORE_DVFS("cve", core_millivolts, -1, 1, KHZ, 1, 1, 297000, 297000, 297000, 297000, 297000, 297000, 297000), + CORE_DVFS("dsia", core_millivolts, -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), + CORE_DVFS("dsib", core_millivolts, -1, 1, KHZ, 1, 275000, 275000, 275000, 275000, 275000, 275000, 275000, 275000), + CORE_DVFS("hdmi", core_millivolts, -1, 1, KHZ, 1, 148500, 148500, 148500, 148500, 148500, 148500, 148500, 148500), /* * The clock rate for the display controllers that determines the @@ -480,18 +490,18 @@ static struct dvfs core_dvfs_table[] = { * to the display block. Disable auto-dvfs on the display clocks, * and let the display driver call tegra_dvfs_set_rate manually */ - CORE_DVFS("disp1", 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - CORE_DVFS("disp1", 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp1", 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp1", 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - - CORE_DVFS("disp2", 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - CORE_DVFS("disp2", 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp2", 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), - CORE_DVFS("disp2", 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), - - CORE_DVFS("pwm", -1, 1, KHZ, 1, 408000, 408000, 408000, 408000, 408000, 408000, 408000, 408000), - CORE_DVFS("spdif_out", -1, 1, KHZ, 1, 26000, 26000, 26000, 26000, 26000, 26000, 26000, 26000), + CORE_DVFS("disp1", core_millivolts, 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + CORE_DVFS("disp1", core_millivolts, 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp1", core_millivolts, 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp1", core_millivolts, 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + + CORE_DVFS("disp2", core_millivolts, 0, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + CORE_DVFS("disp2", core_millivolts, 1, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp2", core_millivolts, 2, 0, KHZ, 1, 155000, 268000, 268000, 268000, 268000, 268000, 268000, 268000), + CORE_DVFS("disp2", core_millivolts, 3, 0, KHZ, 1, 120000, 120000, 120000, 120000, 190000, 190000, 190000, 190000), + + CORE_DVFS("pwm", core_millivolts, -1, 1, KHZ, 1, 408000, 408000, 408000, 408000, 408000, 408000, 408000, 408000), + CORE_DVFS("spdif_out", core_millivolts, -1, 1, KHZ, 1, 26000, 26000, 26000, 26000, 26000, 26000, 26000, 26000), }; @@ -590,7 +600,7 @@ static void __init init_dvfs_one(struct dvfs *d, int nominal_mv_index) tegra_init_max_rate( c, d->freqs[nominal_mv_index] * d->freqs_mult); } - d->max_millivolts = d->dvfs_rail->nominal_millivolts; +// d->max_millivolts = d->dvfs_rail->nominal_millivolts; /* * Check if we may skip enabling dvfs on PLLM. PLLM is a special case, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 09435a35965..25ec7f419a7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -31,8 +31,14 @@ #include #include +#include "../../arch/arm/mach-tegra/dvfs.h" +#include "../../arch/arm/mach-tegra/clock.h" + #include +static DEFINE_MUTEX(dvfs_lock); +static DEFINE_MUTEX(cpu_lp_lock); + /** * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock @@ -605,6 +611,9 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) #include "../../arch/arm/mach-tegra/clock.h" extern int user_mv_table[MAX_DVFS_FREQS]; +extern int avp_millivolts[MAX_DVFS_FREQS]; +extern int lp_cpu_millivolts[MAX_DVFS_FREQS]; +extern int emc_millivolts[MAX_DVFS_FREQS]; static ssize_t show_UV_mV_table(struct cpufreq_policy *policy, char *buf) { @@ -645,7 +654,7 @@ static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_ /* TODO: need some robustness checks */ user_mv_table[i] = volt_cur; - pr_info("user mv tbl[%i]: %lu\n", i, volt_cur); + pr_info("cpu g user mv tbl[%i]: %lu\n", i, volt_cur); /* Non-standard sysfs interface: advance buf */ ret = sscanf(buf, "%s", size_cur); @@ -657,8 +666,280 @@ static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_ return count; } +static ssize_t show_lp_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *cpu_clk_lp = tegra_get_clock_by_name("cpu_lp"); + + /* find how many actual entries there are */ + i = cpu_clk_lp->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + cpu_clk_lp->dvfs->freqs[i]/1000000, + cpu_clk_lp->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_lp_UV_mV_table(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *cpu_clk_lp = tegra_get_clock_by_name("cpu_lp"); + + /* find how many actual entries there are */ + i = cpu_clk_lp->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(cpu_clk_lp->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + lp_cpu_millivolts[i] = volt_cur; + pr_info("cpu lp mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + + return count; +} + +static ssize_t show_emc_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *clk_emc = tegra_get_clock_by_name("emc"); + + /* find how many actual entries there are */ + i = clk_emc->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + clk_emc->dvfs->freqs[i]/1000000, + clk_emc->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_emc_UV_mV_table(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *clk_emc = tegra_get_clock_by_name("emc"); + + /* find how many actual entries there are */ + i = clk_emc->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(clk_emc->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + emc_millivolts[i] = volt_cur; + pr_info("emc mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + + return count; +} + +static ssize_t show_avp_UV_mV_table(struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + char *out = buf; + struct clk *avp_clk = tegra_get_clock_by_name("3d"); + + /* find how many actual entries there are */ + i = avp_clk->dvfs->num_freqs; + + for(i--; i >=0; i--) { + out += sprintf(out, "%lumhz: %i mV\n", + avp_clk->dvfs->freqs[i]/1000000, + avp_clk->dvfs->millivolts[i]); + } + + return out - buf; +} + +static ssize_t store_avp_UV_mV_table(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int i = 0; + unsigned long volt_cur; + int ret; + char size_cur[16]; + + struct clk *avp_clk = tegra_get_clock_by_name("3d"); + + /* find how many actual entries there are */ + i = avp_clk->dvfs->num_freqs; + + for(i--; i >= 0; i--) { + + if(avp_clk->dvfs->freqs[i]/1000000 != 0) { + ret = sscanf(buf, "%lu", &volt_cur); + if (ret != 1) + return -EINVAL; + + /* TODO: need some robustness checks */ + avp_millivolts[i] = volt_cur; + pr_info("avp mv tbl[%i]: %lu\n", i, volt_cur); + + /* Non-standard sysfs interface: advance buf */ + ret = sscanf(buf, "%s", size_cur); + buf += (strlen(size_cur)+1); + } + } + + return count; +} #endif +static ssize_t show_gpu_oc(struct cpufreq_policy *policy, char *buf) +{ + char *c = buf; + struct clk *gpu = tegra_get_clock_by_name("3d"); + unsigned int i = gpu->dvfs->num_freqs; + unsigned long gpu_freq = 0; + + if (i <= 0) + gpu_freq = -1;; + + if (i >= 1) + gpu_freq = gpu->dvfs->freqs[gpu->dvfs->num_freqs-1]/1000000; + + return sprintf(c, "%lu\n", gpu_freq); +} + +static ssize_t store_gpu_oc(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + int ret; + unsigned long gpu_freq = 0; + unsigned int i = 0; + unsigned long new_gpu_freq = 0; + unsigned int new_volt = 0; + + //all the tables that need to be updated with the new frequencies + struct clk *vde = tegra_get_clock_by_name("vde"); + struct clk *mpe = tegra_get_clock_by_name("mpe"); + struct clk *two_d = tegra_get_clock_by_name("2d"); + struct clk *epp = tegra_get_clock_by_name("epp"); + struct clk *three_d = tegra_get_clock_by_name("3d"); + struct clk *three_d2 = tegra_get_clock_by_name("3d2"); + struct clk *se = tegra_get_clock_by_name("se"); + struct clk *cbus = tegra_get_clock_by_name("cbus"); + struct clk *host1x = tegra_get_clock_by_name("host1x"); + struct clk *pll_c = tegra_get_clock_by_name("pll_c"); + struct clk *sbus = tegra_get_clock_by_name("sbus"); + + unsigned int array_size = three_d->dvfs->num_freqs; + + if (array_size <= 0) + return -EINVAL; + + char cur_size[array_size]; + i = array_size; + + ret = sscanf(buf, "%lu", &gpu_freq); + + if (ret == 0) + return -EINVAL; + + new_gpu_freq = gpu_freq*1000000; + + vde->max_rate = new_gpu_freq; + mpe->max_rate = new_gpu_freq; + two_d->max_rate = new_gpu_freq; + epp->max_rate = new_gpu_freq; + three_d->max_rate = new_gpu_freq; + three_d2->max_rate = new_gpu_freq; + se->max_rate = new_gpu_freq; + host1x->max_rate = ( new_gpu_freq / 2 ); + cbus->max_rate = new_gpu_freq; + pll_c->max_rate = ( new_gpu_freq*2 ); + pr_info("NEW PLL_C MAX_RATE: %lu\n", pll_c->max_rate); + sbus->max_rate = (new_gpu_freq/3); + + for (i--; i >= 5; i--) { + mutex_lock(&dvfs_lock); + if (gpu_freq < 600) { + new_volt = 1200; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES < 600: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 600 && gpu_freq < 666) { + new_volt = 1400; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 600: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 666 && gpu_freq < 750) { + new_volt = 1500; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES > 700: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 700 && gpu_freq < 775) { + new_volt = 1550; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 750: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 750 && gpu_freq < 800) { + new_volt = 1600; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 775: %d\n", vde->dvfs->millivolts[i]); + } + if (gpu_freq >= 800) { + new_volt = 1650; + vde->dvfs->millivolts[i] = new_volt; + pr_info("NEW VOLTAGES >= 800: %d\n", vde->dvfs->millivolts[i]); + } + + + vde->dvfs->freqs[i] = new_gpu_freq; + mpe->dvfs->freqs[i] = new_gpu_freq; + two_d->dvfs->freqs[i] = new_gpu_freq; + epp->dvfs->freqs[i] = new_gpu_freq; + three_d->dvfs->freqs[i] = new_gpu_freq; + three_d2->dvfs->freqs[i] = new_gpu_freq; + se->dvfs->freqs[i] = new_gpu_freq; + host1x->dvfs->freqs[i] = ( new_gpu_freq / 2 ); + cbus->dvfs->freqs[i] = new_gpu_freq; + pll_c->dvfs->freqs[i] = ( new_gpu_freq * 2 ); + pr_info("NEW PLL_C FREQS: %lu\n", pll_c->dvfs->freqs[i]); + sbus->dvfs->freqs[i] = ( new_gpu_freq / 3 ); + mutex_unlock(&dvfs_lock); + } + + ret = sscanf(buf, "%s", cur_size); + + if (ret == 0) + return -EINVAL; + + buf += (strlen(cur_size) + 1); + + return count; +} cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400); cpufreq_freq_attr_ro(cpuinfo_min_freq); cpufreq_freq_attr_ro(cpuinfo_max_freq); @@ -676,8 +957,12 @@ cpufreq_freq_attr_rw(scaling_setspeed); cpufreq_freq_attr_rw(dvfs_test); cpufreq_freq_attr_ro(policy_min_freq); cpufreq_freq_attr_ro(policy_max_freq); +cpufreq_freq_attr_rw(gpu_oc); #ifdef CONFIG_VOLTAGE_CONTROL cpufreq_freq_attr_rw(UV_mV_table); +cpufreq_freq_attr_rw(lp_UV_mV_table); +cpufreq_freq_attr_rw(emc_UV_mV_table); +cpufreq_freq_attr_rw(avp_UV_mV_table); #endif static struct attribute *default_attrs[] = { @@ -694,9 +979,13 @@ static struct attribute *default_attrs[] = { &scaling_setspeed.attr, &dvfs_test.attr, &policy_min_freq.attr, + &gpu_oc.attr, &policy_max_freq.attr, #ifdef CONFIG_VOLTAGE_CONTROL &UV_mV_table.attr, + &lp_UV_mV_table.attr, + &emc_UV_mV_table.attr, + &avp_UV_mV_table.attr, #endif NULL From 55c5d9c1cb6155224ffb52b14d7df61acc3d68fc Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 3 Dec 2012 16:13:24 -0500 Subject: [PATCH 217/678] Revert "ARM: tegra: clock: Increase boost_up_threshold for AVP clock" This reverts commit 3fd061c6648c4fe8cdfc98507337752e8e84a8b9. --- arch/arm/mach-tegra/tegra3_actmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_actmon.c b/arch/arm/mach-tegra/tegra3_actmon.c index a76d0a963d9..5df6ed1fc47 100644 --- a/arch/arm/mach-tegra/tegra3_actmon.c +++ b/arch/arm/mach-tegra/tegra3_actmon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved + * Copyright (c) 2011, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -532,7 +532,7 @@ static struct actmon_dev actmon_dev_avp = { .boost_freq_step = 8000, .boost_up_coef = 200, .boost_down_coef = 50, - .boost_up_threshold = 85, + .boost_up_threshold = 75, .boost_down_threshold = 50, .up_wmark_window = 1, From 4da87d98c674d622b09451a185ad7396593319e6 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 27 May 2011 22:25:26 -0400 Subject: [PATCH 218/678] ARM: zImage: ensure it is always a multiple of 64 bits in size This is needed for proper alignment when the DTB appending feature is used. Signed-off-by: Nicolas Pitre Acked-by: Tony Lindgren Tested-by: Shawn Guo Tested-by: Dave Martin Tested-by: Thomas Abraham Signed-off-by: Chinmay Kamat Change-Id: I18dd4220a40984ab2551ca17a16db37193ffe80c Original: 72bf0bce411d9df0935eb77256604212de8f89cc Reviewed-on: http://git-master/r/108696 Reviewed-by: Stephen Warren GVS: Gerrit_Virtual_Submit Tested-by: Nitin Kumbhar Reviewed-by: Dan Willemsen --- arch/arm/boot/compressed/vmlinux.lds.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in index 4e728834a1b..4919f2ac8b8 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.in @@ -51,6 +51,10 @@ SECTIONS _got_start = .; .got : { *(.got) } _got_end = .; + + /* ensure the zImage file size is always a multiple of 64 bits */ + /* (without a dummy byte, ld just ignores the empty section) */ + .pad : { BYTE(0); . = ALIGN(8); } _edata = .; . = BSS_START; From 3c86fc032262a1cc9c388ec1cefe00f1df992032 Mon Sep 17 00:00:00 2001 From: Mike Kasick Date: Tue, 27 Nov 2012 16:39:12 +0100 Subject: [PATCH 219/678] Add support for kexec-hardboot "Allows hard booting (i.e., with a full hardware reboot) to a kernel previously loaded in memory by kexec. This works around the problem of soft-booted kernel hangs due to improper device shutdown and/or reinitialization." More info in /arch/arm/Kconfig. Original author: Mike Kasick These patches are ported from Asus TF201, to which it was ported by Jens Andersen . Change-Id: Ibee734f61ffa97577bfbcdd9a8cd567bd2d89f32 --- arch/arm/Kconfig | 26 ++++++++ arch/arm/boot/compressed/Makefile | 3 + arch/arm/boot/compressed/head.S | 76 ++++++++++++++++++++++- arch/arm/boot/compressed/misc.c | 23 +++++++ arch/arm/configs/motley_grouper_defconfig | 4 +- arch/arm/include/asm/kexec.h | 8 +++ arch/arm/kernel/machine_kexec.c | 25 +++++++- arch/arm/kernel/relocate_kernel.S | 47 ++++++++++++++ arch/arm/mach-tegra/common.c | 9 ++- arch/arm/mach-tegra/include/mach/memory.h | 12 ++++ arch/arm/mach-tegra/reset.c | 23 +++++++ include/linux/kexec.h | 19 +++++- kernel/kexec.c | 4 ++ 13 files changed, 270 insertions(+), 9 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d489b8f83bd..a6806407915 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1992,6 +1992,32 @@ config ATAGS_PROC Should the atags used to boot the kernel be exported in an "atags" file in procfs. Useful with kexec. +config KEXEC_HARDBOOT + bool "Support hard booting to a kexec kernel" + depends on KEXEC + help + Allows hard booting (i.e., with a full hardware reboot) to a kernel + previously loaded in memory by kexec. This works around the problem of + soft-booted kernel hangs due to improper device shutdown and/or + reinitialization. Support is comprised of two components: + + First, a "hardboot" flag is added to the kexec syscall to force a hard + reboot in relocate_new_kernel() (which requires machine-specific assembly + code). This also requires the kexec userspace tool to load the kexec'd + kernel in memory region left untouched by the bootloader (i.e., not + explicitly cleared and not overwritten by the boot kernel). Just prior + to reboot, the kexec kernel arguments are stashed in a machine-specific + memory page that must also be preserved. Note that this hardboot page + need not be reserved during regular kernel execution. + + Second, the zImage decompresor of the boot (bootloader-loaded) kernel is + modified to check the hardboot page for fresh kexec arguments, and if + present, attempts to jump to the kexec'd kernel preserved in memory. + + Note that hardboot support is only required in the boot kernel and any + kernel capable of performing a hardboot kexec. It is _not_ required by a + kexec'd kernel. + config CRASH_DUMP bool "Build kdump crash kernel (EXPERIMENTAL)" depends on EXPERIMENTAL diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 9e56c726bbd..833905c6ecc 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -119,6 +119,9 @@ endif ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 endif +ifneq ($(PARAMS_PHYS),) +LDFLAGS_vmlinux += --defsym params_phys=$(PARAMS_PHYS) +endif # ? LDFLAGS_vmlinux += -p # Report unresolved symbol references diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 24701d6f72b..f3f5a9af997 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -9,6 +9,11 @@ * published by the Free Software Foundation. */ #include +#include + +#ifdef CONFIG_KEXEC_HARDBOOT + #include +#endif /* * Debugging stuff @@ -133,7 +138,31 @@ start: .word _edata @ zImage end address THUMB( .thumb ) 1: mov r7, r1 @ save architecture ID - mov r8, r2 @ save atags pointer + teq r0, #0 @ Check for kexec_boot_atags. + movne r8, r0 @ Save kexec_boot_tags. + moveq r8, r2 @ save atags pointer + +#ifdef CONFIG_KEXEC_HARDBOOT + /* Check hardboot page for a kexec kernel. */ + ldr r3, =KEXEC_HB_PAGE_ADDR + ldr r0, [r3] + ldr r1, =KEXEC_HB_PAGE_MAGIC + teq r0, r1 + bne not_booting_other + + /* Clear hardboot page magic to avoid boot loop. */ + mov r0, #0 + str r0, [r3] + + /* Load boot arguments and jump to kexec kernel. */ + ldr r0, [r3, #12] @ kexec_boot_atags (r2: boot_atags) + ldr r1, [r3, #8] @ kexec_mach_type + ldr pc, [r3, #4] @ kexec_start_address + + .ltorg + +not_booting_other: +#endif #ifndef __ARM_ARCH_2__ /* @@ -348,6 +377,44 @@ not_relocated: mov r0, #0 add r2, sp, #0x10000 @ 64k max mov r3, r7 bl decompress_kernel + +/* Copy the kernel tagged list (atags): + * + * The kernel requires atags to be located in a direct-mapped region, + * usually below the kernel in the first 16 kB of RAM. If they're above + * (the start of) the kernel, they need to be copied to a suitable + * location, e.g., the machine-defined params_phys. + * + * The assumption is that the tags will only be "out of place" if the + * decompressor code is also, so copying is implemented only in the "won't + * overwrite" case (which should be fixed). Still need to make sure that + * the copied tags don't overwrite either the kernel or decompressor code + * (or rather, the remainder of it since everything up to here has already + * been executed). + * + * r4: zreladdr (kernel start) + * r8: atags */ + + /* Don't need to copy atags if they're already below the kernel. */ + cmp r8, r4 + blo call_kernel + + /* r1: min(zreladdr, pc) */ + mov r1, pc + cmp r4, r1 + movlo r1, r4 + + /* Compute max space for atags, if max <= 0 don't copy. */ + ldr r0, =params_phys @ dest + subs r2, r1, r0 @ max = min(zreladdr, pc) - dest + bls call_kernel + + /* Copy atags to params_phys. */ + mov r1, r8 @ src + bl copy_atags + mov r8, r0 + +call_kernel: bl cache_clean_flush bl cache_off mov r0, #0 @ must be zero @@ -356,6 +423,8 @@ not_relocated: mov r0, #0 ARM( mov pc, r4 ) @ call kernel THUMB( bx r4 ) @ entry point is always ARM + .ltorg + .align 2 .type LC0, #object LC0: .word LC0 @ r1 @@ -467,9 +536,14 @@ __setup_mmu: sub r3, r4, #16384 @ Page directory size * bits for the RAM area only. */ mov r0, r3 +#if defined(PLAT_PHYS_OFFSET) && defined(END_MEM) + mov r9, #PLAT_PHYS_OFFSET @ start of RAM + ldr r10, =END_MEM @ end of RAM +#else mov r9, r0, lsr #18 mov r9, r9, lsl #18 @ start of RAM add r10, r9, #0x10000000 @ a reasonable RAM size +#endif mov r1, #0x12 orr r1, r1, #3 << 10 add r2, r3, #16384 diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index 832d37236c5..f1ce0efa505 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -25,6 +25,7 @@ unsigned int __machine_arch_type; #include /* for NULL */ #include #include +#include static void putstr(const char *ptr); @@ -192,3 +193,25 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, else putstr(" done, booting the kernel.\n"); } + +const struct tag *copy_atags(struct tag *dest, const struct tag *src, + size_t max) +{ + struct tag *tag; + size_t size; + + /* Find the last tag (ATAG_NONE). */ + for_each_tag(tag, (struct tag *)src) + continue; + + /* Include the last tag in copy. */ + size = (char *)tag - (char *)src + sizeof(struct tag_header); + + /* If there's not enough room, just use original and hope it works. */ + if (size > max) + return src; + + memcpy(dest, src, size); + + return dest; +} diff --git a/arch/arm/configs/motley_grouper_defconfig b/arch/arm/configs/motley_grouper_defconfig index 4b871c7b459..d59826a235e 100644 --- a/arch/arm/configs/motley_grouper_defconfig +++ b/arch/arm/configs/motley_grouper_defconfig @@ -486,7 +486,9 @@ CONFIG_CMDLINE="tegra_wdt.heartbeat=30" CONFIG_CMDLINE_EXTEND=y # CONFIG_CMDLINE_FORCE is not set # CONFIG_XIP_KERNEL is not set -# CONFIG_KEXEC is not set +CONFIG_KEXEC=y +CONFIG_ATAGS_PROC=y +CONFIG_KEXEC_HARDBOOT=y # CONFIG_CRASH_DUMP is not set # CONFIG_AUTO_ZRELADDR is not set diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index c2b9b4bdec0..564c55b394e 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -17,6 +17,10 @@ #define KEXEC_ARM_ATAGS_OFFSET 0x1000 #define KEXEC_ARM_ZIMAGE_OFFSET 0x8000 +#ifdef CONFIG_KEXEC_HARDBOOT + #define KEXEC_HB_PAGE_MAGIC 0x4a5db007 +#endif + #ifndef __ASSEMBLY__ /** @@ -53,6 +57,10 @@ static inline void crash_setup_regs(struct pt_regs *newregs, /* Function pointer to optional machine-specific reinitialization */ extern void (*kexec_reinit)(void); +#ifdef CONFIG_KEXEC_HARDBOOT +extern void (*kexec_hardboot_hook)(void); +#endif + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_KEXEC */ diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index e59bbd496c3..812c0cbd1e4 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -22,6 +22,10 @@ extern unsigned long kexec_start_address; extern unsigned long kexec_indirection_page; extern unsigned long kexec_mach_type; extern unsigned long kexec_boot_atags; +#ifdef CONFIG_KEXEC_HARDBOOT +extern unsigned long kexec_hardboot; +void (*kexec_hardboot_hook)(void); +#endif static atomic_t waiting_for_crash_ipi; @@ -99,6 +103,9 @@ void machine_kexec(struct kimage *image) kexec_indirection_page = page_list; kexec_mach_type = machine_arch_type; kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET; +#ifdef CONFIG_KEXEC_HARDBOOT + kexec_hardboot = image->hardboot; +#endif /* copy our kernel relocation code to the control code page */ memcpy(reboot_code_buffer, @@ -114,11 +121,23 @@ void machine_kexec(struct kimage *image) local_irq_disable(); local_fiq_disable(); setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/ + +#ifdef CONFIG_KEXEC_HARDBOOT + /* Run any final machine-specific shutdown code. */ + if (image->hardboot && kexec_hardboot_hook) + kexec_hardboot_hook(); +#endif + flush_cache_all(); outer_flush_all(); outer_disable(); cpu_proc_fin(); - outer_inv_all(); - flush_cache_all(); - cpu_reset(reboot_code_buffer_phys); + + // Freezes the tegra 3 + //outer_inv_all(); + //flush_cache_all(); + + /* Must call cpu_reset via physical address since ARMv7 (& v6) stalls the + * pipeline after disabling the MMU. */ + ((typeof(cpu_reset) *)virt_to_phys(cpu_reset))(reboot_code_buffer_phys); } diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index d0cdedf4864..98e0a89782c 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -4,6 +4,13 @@ #include +#ifdef CONFIG_KEXEC_HARDBOOT +#include +#if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC) + #include +#endif +#endif + .globl relocate_new_kernel relocate_new_kernel: @@ -52,6 +59,12 @@ relocate_new_kernel: b 0b 2: +#ifdef CONFIG_KEXEC_HARDBOOT + ldr r0, kexec_hardboot + teq r0, #0 + bne hardboot +#endif + /* Jump to relocated kernel */ mov lr,r1 mov r0,#0 @@ -60,6 +73,34 @@ relocate_new_kernel: ARM( mov pc, lr ) THUMB( bx lr ) +#ifdef CONFIG_KEXEC_HARDBOOT +hardboot: + /* Stash boot arguments in hardboot page: + * 0: KEXEC_HB_PAGE_MAGIC + * 4: kexec_start_address + * 8: kexec_mach_type + * 12: kexec_boot_atags */ + ldr r0, =KEXEC_HB_PAGE_ADDR + str r1, [r0, #4] + ldr r1, kexec_mach_type + str r1, [r0, #8] + ldr r1, kexec_boot_atags + str r1, [r0, #12] + ldr r1, =KEXEC_HB_PAGE_MAGIC + str r1, [r0] + +#if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC) + ldr r0, =TEGRA_PMC_BASE + ldr r1, [r0] + orr r1, r1, #0x10 + str r1, [r0] +loop: b loop +#else +#error "No reboot method defined for hardboot." +#endif + + .ltorg +#endif .align .globl kexec_start_address @@ -79,6 +120,12 @@ kexec_mach_type: kexec_boot_atags: .long 0x0 +#ifdef CONFIG_KEXEC_HARDBOOT + .globl kexec_hardboot +kexec_hardboot: + .long 0x0 +#endif + relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index 430278cdd34..483f8391410 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -961,13 +961,20 @@ void __init tegra_ram_console_debug_reserve(unsigned long ram_console_size) { struct resource *res; long ret; + unsigned long real_start, real_size; res = platform_get_resource(&ram_console_device, IORESOURCE_MEM, 0); if (!res) goto fail; + res->start = memblock_end_of_DRAM() - ram_console_size; res->end = res->start + ram_console_size - 1; - ret = memblock_remove(res->start, ram_console_size); + + // Register an extra 1M before ramconsole to store kexec stuff + real_start = res->start - SZ_1M; + real_size = ram_console_size + SZ_1M; + + ret = memblock_remove(real_start, real_size); if (ret) goto fail; diff --git a/arch/arm/mach-tegra/include/mach/memory.h b/arch/arm/mach-tegra/include/mach/memory.h index 5f51066482e..84dd44ebaa9 100644 --- a/arch/arm/mach-tegra/include/mach/memory.h +++ b/arch/arm/mach-tegra/include/mach/memory.h @@ -29,6 +29,18 @@ #define PLAT_PHYS_OFFSET UL(0x80000000) #endif +#if defined(CONFIG_MACH_GROUPER) +#define END_MEM UL(0xBEA00000) +#endif + +#if defined(CONFIG_KEXEC_HARDBOOT) +#if defined(CONFIG_MACH_GROUPER) +#define KEXEC_HB_PAGE_ADDR UL(0xBEA00000) +#else +#error "Adress for kexec hardboot page not defined" +#endif +#endif + /* * Unaligned DMA causes tegra dma to place data on 4-byte boundary after * expected address. Call to skb_reserve(skb, NET_IP_ALIGN) was causing skb diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 3ab2c132d62..d91ba95eaff 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -27,6 +27,10 @@ #include "sleep.h" #include "pm.h" +#ifdef CONFIG_KEXEC_HARDBOOT +#include +#endif + static bool is_enabled; static void tegra_cpu_reset_handler_enable(void) @@ -88,6 +92,21 @@ void tegra_cpu_reset_handler_restore(void) } #endif +#ifdef CONFIG_KEXEC_HARDBOOT +#define RECOVERY_MODE BIT(31) +void tegra_kexec_hardboot(void) +{ + /* Reboot with the recovery kernel since the boot kernel decompressor may + * not support the hardboot jump. */ + + void __iomem *reset = IO_ADDRESS(TEGRA_PMC_BASE + 0x00); + + u32 reg = readl_relaxed(reset + PMC_SCRATCH0); + reg |= RECOVERY_MODE; + writel_relaxed(reg, reset + PMC_SCRATCH0); +} +#endif + void __init tegra_cpu_reset_handler_init(void) { #ifdef CONFIG_SMP @@ -112,4 +131,8 @@ void __init tegra_cpu_reset_handler_init(void) __pa(&__tegra_cpu_reset_handler_data[TEGRA_RESET_DATA_SIZE])); tegra_cpu_reset_handler_enable(); + +#ifdef CONFIG_KEXEC_HARDBOOT + kexec_hardboot_hook = tegra_kexec_hardboot; +#endif } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c2478a342cd..e0f1cee6616 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -101,6 +101,10 @@ struct kimage { #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; +#ifdef CONFIG_KEXEC_HARDBOOT + unsigned int hardboot : 1; +#endif + #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; #endif @@ -165,6 +169,11 @@ extern struct kimage *kexec_crash_image; #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_PRESERVE_CONTEXT 0x00000002 + +#ifdef CONFIG_KEXEC_HARDBOOT +#define KEXEC_HARDBOOT 0x00000004 +#endif + #define KEXEC_ARCH_MASK 0xffff0000 /* These values match the ELF architecture values. @@ -183,10 +192,14 @@ extern struct kimage *kexec_crash_image; #define KEXEC_ARCH_MIPS ( 8 << 16) /* List of defined/legal kexec flags */ -#ifndef CONFIG_KEXEC_JUMP -#define KEXEC_FLAGS KEXEC_ON_CRASH -#else +#if defined(CONFIG_KEXEC_JUMP) && defined(CONFIG_KEXEC_HARDBOOT) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_HARDBOOT) +#elif defined(CONFIG_KEXEC_JUMP) #define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) +#elif defined(CONFIG_KEXEC_HARDBOOT) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_HARDBOOT) +#else +#define KEXEC_FLAGS (KEXEC_ON_CRASH) #endif #define VMCOREINFO_BYTES (4096) diff --git a/kernel/kexec.c b/kernel/kexec.c index 296fbc84d65..2e2f1df2794 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1005,6 +1005,10 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; +#ifdef CONFIG_KEXEC_HARDBOOT + if (flags & KEXEC_HARDBOOT) + image->hardboot = 1; +#endif result = machine_kexec_prepare(image); if (result) goto out; From 969680236ef1714f1cf4a7fdd8a8695251f2f8e4 Mon Sep 17 00:00:00 2001 From: John Bonesio Date: Fri, 27 May 2011 18:45:50 -0400 Subject: [PATCH 220/678] ARM: zImage: Allow the appending of a device tree binary This patch provides the ability to boot using a device tree that is appended to the raw binary zImage (e.g. cat zImage .dtb > zImage_w_dtb). Signed-off-by: John Bonesio [nico: ported to latest zImage changes plus additional cleanups/improvements] Signed-off-by: Nicolas Pitre Acked-by: Grant Likely Acked-by: Tony Lindgren Tested-by: Shawn Guo Tested-by: Dave Martin Tested-by: Thomas Abraham Signed-off-by: Chinmay Kamat Change-Id: I93c54694a59b357eb550587b94c986dcb19ab954 Original: e2a6a3aafa9862c4a4b59f2a59b8f923d64a680e Reviewed-on: http://git-master/r/107422 Reviewed-by: Simone Willett Tested-by: Simone Willett --- arch/arm/Kconfig | 20 ++++++++++ arch/arm/boot/compressed/head.S | 70 +++++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a6806407915..9550edfc140 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1904,6 +1904,26 @@ config ZBOOT_ROM_SH_MOBILE_SDHI endchoice +config ARM_APPENDED_DTB + bool "Use appended device tree blob to zImage (EXPERIMENTAL)" + depends on OF && !ZBOOT_ROM && EXPERIMENTAL + help + With this option, the boot code will look for a device tree binary + (DTB) appended to zImage + (e.g. cat zImage .dtb > zImage_w_dtb). + + This is meant as a backward compatibility convenience for those + systems with a bootloader that can't be upgraded to accommodate + the documented boot protocol using a device tree. + + Beware that there is very little in terms of protection against + this option being confused by leftover garbage in memory that might + look like a DTB header after a reboot if no actual DTB is appended + to zImage. Do not leave this option active in a production kernel + if you don't intend to always append a DTB. Proper passing of the + location into r2 of a bootloader provided DTB is always preferable + to this option. + config CMDLINE string "Default kernel command string" default "" diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index f3f5a9af997..1e2a694ad69 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -245,6 +245,59 @@ restart: adr r0, LC0 mov r10, r6 #endif + mov r5, #0 @ init dtb size to 0 +#ifdef CONFIG_ARM_APPENDED_DTB +/* + * r0 = delta + * r2 = BSS start + * r3 = BSS end + * r4 = final kernel address + * r5 = appended dtb size (still unknown) + * r6 = _edata + * r7 = architecture ID + * r8 = atags/device tree pointer + * r9 = size of decompressed image + * r10 = end of this image, including bss/stack/malloc space if non XIP + * r11 = GOT start + * r12 = GOT end + * sp = stack pointer + * + * if there are device trees (dtb) appended to zImage, advance r10 so that the + * dtb data will get relocated along with the kernel if necessary. + */ + + ldr lr, [r6, #0] +#ifndef __ARMEB__ + ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian +#else + ldr r1, =0xd00dfeed +#endif + cmp lr, r1 + bne dtb_check_done @ not found + + mov r8, r6 @ use the appended device tree + + /* Get the dtb's size */ + ldr r5, [r6, #4] +#ifndef __ARMEB__ + /* convert r5 (dtb size) to little endian */ + eor r1, r5, r5, ror #16 + bic r1, r1, #0x00ff0000 + mov r5, r5, ror #8 + eor r5, r5, r1, lsr #8 +#endif + + /* preserve 64-bit alignment */ + add r5, r5, #7 + bic r5, r5, #7 + + /* relocate some pointers past the appended dtb */ + add r6, r6, r5 + add r10, r10, r5 + add sp, sp, r5 +dtb_check_done: +#endif + /* * Check to see if we will overwrite ourselves. * r4 = final kernel address @@ -314,14 +367,16 @@ wont_overwrite: * r2 = BSS start * r3 = BSS end * r4 = kernel execution address + * r5 = appended dtb size (0 if not present) * r7 = architecture ID * r8 = atags pointer * r11 = GOT start * r12 = GOT end * sp = stack pointer */ - teq r0, #0 + orrs r1, r0, r5 beq not_relocated + add r11, r11, r0 add r12, r12, r0 @@ -336,12 +391,21 @@ wont_overwrite: /* * Relocate all entries in the GOT table. + * Bump bss entries to _edata + dtb size */ 1: ldr r1, [r11, #0] @ relocate entries in the GOT - add r1, r1, r0 @ table. This fixes up the - str r1, [r11], #4 @ C references. + add r1, r1, r0 @ This fixes up C references + cmp r1, r2 @ if entry >= bss_start && + cmphs r3, r1 @ bss_end > entry + addhi r1, r1, r5 @ entry += dtb size + str r1, [r11], #4 @ next entry cmp r11, r12 blo 1b + + /* bump our bss pointers too */ + add r2, r2, r5 + add r3, r3, r5 + #else /* From dc3198f615957b8cef8b179945b171d3983f7f2d Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 4 Dec 2012 12:15:03 -0500 Subject: [PATCH 221/678] drivers: cpufreq: comment out the setting of fixed voltages in custom gpu oc code may use in future to establish safe levels --- drivers/cpufreq/cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 25ec7f419a7..1a8b31853ec 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -884,7 +884,7 @@ static ssize_t store_gpu_oc(struct cpufreq_policy *policy, const char *buf, size for (i--; i >= 5; i--) { mutex_lock(&dvfs_lock); - if (gpu_freq < 600) { +/* if (gpu_freq < 600) { new_volt = 1200; vde->dvfs->millivolts[i] = new_volt; pr_info("NEW VOLTAGES < 600: %d\n", vde->dvfs->millivolts[i]); @@ -914,7 +914,7 @@ static ssize_t store_gpu_oc(struct cpufreq_policy *policy, const char *buf, size vde->dvfs->millivolts[i] = new_volt; pr_info("NEW VOLTAGES >= 800: %d\n", vde->dvfs->millivolts[i]); } - +*/ vde->dvfs->freqs[i] = new_gpu_freq; mpe->dvfs->freqs[i] = new_gpu_freq; From 7c915eeb851e81f078b19e63fbcc279c45a9b2f3 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 4 Dec 2012 12:28:45 -0500 Subject: [PATCH 222/678] cpufreq: interactive: better usage of lp max and multicore touchboost --- drivers/cpufreq/cpufreq_interactive.c | 78 ++++++++++----------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 5a08993f5df..19daca8c5e4 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -31,10 +31,18 @@ #include #include #include +#include + +#include "../../arch/arm/mach-tegra/clock.h" +#include "../../arch/arm/mach-tegra/pm.h" #define CREATE_TRACE_POINTS #include +/* lpcpu variables */ +static struct clk *cpu_lp_clk; +static unsigned int idle_top_freq; + static atomic_t active_count = ATOMIC_INIT(0); struct cpufreq_interactive_cpuinfo { @@ -80,28 +88,10 @@ struct cpufreq_interactive_core_lock { static struct cpufreq_interactive_core_lock core_lock; /* Hi speed to bump to from lo speed when load burst (default max) */ -static unsigned int hispeed_freq = 1100000; - -/* CPU will be boosted to this freq - default 1000Mhz - when an input event is detected */ -#ifdef CONFIG_LP_OVERCLOCK -#ifdef CONFIG_LP_OC_700 -static unsigned int input_boost_freq = 700000; -#endif -#ifdef CONFIG_LP_OC_666 -static unsigned int input_boost_freq = 666000; -#endif -#ifdef CONFIG_LP_OC_620 -static unsigned int input_boost_freq = 620000; -#endif -#ifdef CONFIG_LP_OC_550 -static unsigned int input_boost_freq = 550000; -#endif -#else -static unsigned int input_boost_freq = 475000; -#endif +static unsigned int hispeed_freq = 1300000; /* Go to hispeed_freq when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 80 +#define DEFAULT_GO_HISPEED_LOAD 85 static unsigned long go_hispeed_load; /* Consider IO as busy */ @@ -110,13 +100,13 @@ static unsigned long io_is_busy; /* * The minimum amount of time to spend at a frequency before we can ramp down. */ -#define DEFAULT_MIN_SAMPLE_TIME 30000; +#define DEFAULT_MIN_SAMPLE_TIME 60000; static unsigned long min_sample_time; /* * The sample rate of the timer used to increase frequency */ -#define DEFAULT_TIMER_RATE 20000; +#define DEFAULT_TIMER_RATE 40000; static unsigned long timer_rate; /* @@ -197,11 +187,7 @@ static unsigned int cpufreq_interactive_get_target( } } } else { - if (hispeed_freq > input_boost_freq) { - target_freq = ((hispeed_freq + input_boost_freq) / 2) * cpu_load / 100; - } else { - target_freq = hispeed_freq * cpu_load / 100; - } + target_freq = idle_top_freq * cpu_load / 100; } target_freq = min(target_freq, pcpu->policy->max); @@ -540,10 +526,14 @@ static int cpufreq_interactive_speedchange_task(void *data) return 0; } +static unsigned int Touch_poke_attr[4] = {1100000, 860000, 0, 0}; + static void cpufreq_interactive_boost(void) { int i; int anyboost = 0; + unsigned int nr_cpus; + unsigned int input_boost_freq; unsigned long flags; struct cpufreq_interactive_cpuinfo *pcpu; @@ -552,6 +542,13 @@ static void cpufreq_interactive_boost(void) for_each_online_cpu(i) { pcpu = &per_cpu(cpuinfo, i); + nr_cpus = num_online_cpus(); + + if (!is_lp_cluster()) { + input_boost_freq = Touch_poke_attr[nr_cpus-1]; + } else { + input_boost_freq = idle_top_freq; + } if (pcpu->target_freq < input_boost_freq) { pcpu->target_freq = input_boost_freq; cpumask_set_cpu(i, &speedchange_cpumask); @@ -637,29 +634,6 @@ static int cpufreq_interactive_lock_cores_task(void *data) return 0; } -static ssize_t show_input_boost_freq(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%u\n", input_boost_freq); -} - -static ssize_t store_input_boost_freq(struct kobject *kobj, - struct attribute *attr, const char *buf, - size_t count) -{ - int ret; - long unsigned int val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - input_boost_freq = val; - return count; -} - -static struct global_attr input_boost_freq_attr = __ATTR(input_boost_freq, 0644, - show_input_boost_freq, store_input_boost_freq); - /* * Pulsed boost on input event raises CPUs to hispeed_freq and lets * usual algorithm of min_sample_time decide when to allow speed @@ -930,7 +904,6 @@ static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, define_one_global_rw(boost); static struct attribute *interactive_attributes[] = { - &input_boost_freq_attr.attr, &io_is_busy_attr.attr, &hispeed_freq_attr.attr, &go_hispeed_load_attr.attr, @@ -1078,6 +1051,9 @@ static int __init cpufreq_interactive_init(void) */ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO-1 }; + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; min_sample_time = DEFAULT_MIN_SAMPLE_TIME; above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; From aac1d3ba8639a6b39f64fdba4daf4698b5a00655 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 15:15:57 -0500 Subject: [PATCH 223/678] sound/soc/tegra/tegra_pcm: add min CPU perf lock for audio playback thanks to faux123 --- sound/soc/tegra/Kconfig | 11 +++++++++++ sound/soc/tegra/tegra_pcm.c | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/sound/soc/tegra/Kconfig b/sound/soc/tegra/Kconfig index 1217aaa224f..15e832b2628 100644 --- a/sound/soc/tegra/Kconfig +++ b/sound/soc/tegra/Kconfig @@ -230,6 +230,17 @@ config SND_SOC_TEGRA_MAX98095 Say Y or M here if you want to add support for SoC audio on Tegra boards using the MAX98095 codec. Currently, only supported board is Cardhu. + config HEADSET_FUNCTION tristate "Headset detection function" default n + +config AUDIO_MIN_PERFLOCK + + bool "Minimum Audio Playback performance lock + depends on SND_SOC_TEGRA_RT5640 + default n + help + Tegra3 minimum Audio Performance Lock to prevent audio playback + underruns + diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 7186f3dd3bf..f34d087cd72 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -40,6 +40,12 @@ #include "tegra_pcm.h" +#ifdef CONFIG_AUDIO_MIN_PERFLOCK +#include +#define PLAYBACK_CPU_FREQ_MAX 370000 +static struct pm_qos_request_list playback_cpu_freq_req; +#endif + #define DRV_NAME "tegra-pcm-audio" #define PERIOD_BYTES_MAX (PAGE_SIZE * 2) @@ -146,6 +152,10 @@ static int tegra_pcm_open(struct snd_pcm_substream *substream) if (prtd == NULL) return -ENOMEM; +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_update_request(&playback_cpu_freq_req, + (s32)PLAYBACK_CPU_FREQ_MAX); +#endif runtime->private_data = prtd; prtd->substream = substream; @@ -198,6 +208,11 @@ static int tegra_pcm_close(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct tegra_runtime_data *prtd = runtime->private_data; +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_update_request(&playback_cpu_freq_req, + (s32)PM_QOS_CPU_FREQ_MIN_DEFAULT_VALUE); +#endif + if (prtd->dma_chan) tegra_dma_free_channel(prtd->dma_chan); @@ -430,12 +445,20 @@ static struct platform_driver tegra_pcm_driver = { static int __init snd_tegra_pcm_init(void) { +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_add_request(&playback_cpu_freq_req, + PM_QOS_CPU_FREQ_MIN, + (s32)PLAYBACK_CPU_FREQ_MAX); +#endif return platform_driver_register(&tegra_pcm_driver); } module_init(snd_tegra_pcm_init); static void __exit snd_tegra_pcm_exit(void) { +#ifdef CONFIG_AUDIO_MIN_PERFLOCK + pm_qos_remove_request(&playback_cpu_freq_req); +#endif platform_driver_unregister(&tegra_pcm_driver); } module_exit(snd_tegra_pcm_exit); From 9f3e552de08da1ec9535ff774c387610dc7bf10d Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 15:39:40 -0500 Subject: [PATCH 224/678] sound/soc/tegra/tegra_pcm: fix typo in min cpu perf lock --- sound/soc/tegra/tegra_pcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index f34d087cd72..934cf06f48a 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -42,7 +42,7 @@ #ifdef CONFIG_AUDIO_MIN_PERFLOCK #include -#define PLAYBACK_CPU_FREQ_MAX 370000 +#define PLAYBACK_CPU_FREQ_MIN 340000 static struct pm_qos_request_list playback_cpu_freq_req; #endif @@ -154,7 +154,7 @@ static int tegra_pcm_open(struct snd_pcm_substream *substream) #ifdef CONFIG_AUDIO_MIN_PERFLOCK pm_qos_update_request(&playback_cpu_freq_req, - (s32)PLAYBACK_CPU_FREQ_MAX); + (s32)PLAYBACK_CPU_FREQ_MIN); #endif runtime->private_data = prtd; prtd->substream = substream; @@ -448,7 +448,7 @@ static int __init snd_tegra_pcm_init(void) #ifdef CONFIG_AUDIO_MIN_PERFLOCK pm_qos_add_request(&playback_cpu_freq_req, PM_QOS_CPU_FREQ_MIN, - (s32)PLAYBACK_CPU_FREQ_MAX); + (s32)PLAYBACK_CPU_FREQ_MIN); #endif return platform_driver_register(&tegra_pcm_driver); } From 26a3d920afb6380be84b900cef5d0df8f7ecb250 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 18:55:25 -0500 Subject: [PATCH 225/678] ARM: tegra: select correct parent clk for pll_p For Tegra30, pll_p clk's parent is wrongly specified as clk_m instead of pll_ref in the tegra30_clk_init_table and this is resulting in a boot-time warning. This patch fixes this by correcting the clk init table. --- arch/arm/mach-tegra/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index 483f8391410..4eabbdc436d 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -172,7 +172,7 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "pll_p_out2", "pll_p", 48000000, false }, { "pll_p_out3", "pll_p", 72000000, true }, { "pll_p_out4", "pll_p", 108000000, false }, - { "pll_m", "clk_m", 0, true }, + { "pll_m", "pll_ref", 0, true }, { "pll_m_out1", "pll_m", 120000000, true }, { "sclk", "pll_c_out1", 40000000, true }, { "hclk", "sclk", 40000000, true }, From 0b6934809225f8b2c7472de1ebdbd65807f07f8e Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 19:20:15 -0500 Subject: [PATCH 226/678] iommu/tegra: smmu: Fix deadly typo Fix a deadly typo in macro definition. --- drivers/iommu/tegra-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index f4d859fca7f..5f020f6a7d7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -149,7 +149,7 @@ #define SMMU_ADDR_TO_PFN(addr) ((addr) >> 12) #define SMMU_ADDR_TO_PDN(addr) ((addr) >> 22) -#define SMMU_PDN_TO_ADDR(addr) ((pdn) << 22) +#define SMMU_PDN_TO_ADDR(pdn) ((pdn) << 22) #define _READABLE (1 << SMMU_PTB_DATA_ASID_READABLE_SHIFT) #define _WRITABLE (1 << SMMU_PTB_DATA_ASID_WRITABLE_SHIFT) From d35d0cb2e34446461f76c755ced134699aef93fc Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 20:52:12 -0500 Subject: [PATCH 227/678] arm: mach-tegra: add 332 GPU option and 740LP option --- arch/arm/mach-tegra/tegra3_clocks.c | 7 +++++-- arch/arm/mach-tegra/tegra3_dvfs.c | 27 ++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 0dcf3a7d953..b62613a359f 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3918,7 +3918,7 @@ static struct clk tegra_clk_cclk_lp = { .inputs = mux_cclk_lp, .reg = 0x370, .ops = &tegra_super_ops, - .max_rate = 720000000, + .max_rate = 740000000, }; static struct clk tegra_clk_sclk = { @@ -3946,7 +3946,7 @@ static struct clk tegra_clk_virtual_cpu_lp = { .name = "cpu_lp", .parent = &tegra_clk_cclk_lp, .ops = &tegra_cpu_ops, - .max_rate = 700000000, + .max_rate = 740000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -4657,6 +4657,9 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { { 3, 340000 }, { 4, 475000 }, #ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_740 + { 5, 740000 }, +#endif #ifdef CONFIG_LP_OC_700 { 5, 700000 }, #endif diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 1e099f7a0b3..ed4a3b223a1 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -231,7 +231,7 @@ static struct dvfs cpu_dvfs_table[] = { } static struct dvfs core_dvfs_table[] = { - /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ + /* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ /* Clock limits for internal blocks, PLLs */ CORE_DVFS("cpu_lp", lp_cpu_millivolts, 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), #ifdef CONFIG_LP_OVERCLOCK @@ -247,6 +247,9 @@ static struct dvfs core_dvfs_table[] = { #ifdef CONFIG_LP_OC_700 CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 620000, 700000, 700000, 700000, 700000, 700000), #endif +#ifdef CONFIG_LP_OC_740 + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 740000), +#endif #else CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), #endif @@ -268,7 +271,7 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("vi", core_millivolts, 2, 1, KHZ, 1, 219000, 267000, 300000, 371000, 409000, 425000, 425000, 425000), CORE_DVFS("vi", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 470000, 470000, 470000), -/* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ +/* Core voltages (mV): 950, 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350 */ CORE_DVFS("vde", avp_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), CORE_DVFS("mpe", avp_millivolts, 0, 1, KHZ, 1, 234000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), @@ -279,6 +282,15 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("se", avp_millivolts, 0, 1, KHZ, 1, 267000, 285000, 332000, 380000, 416000, 416000, 416000, 416000), #ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("2d", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("epp", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("3d", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("3d2", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("se", avp_millivolts, 1, 1, KHZ, 1, 267000, 285000, 332000, 380000, 332000, 332000, 332000, 332000), +#endif #ifdef CONFIG_GPU_OC_446 CORE_DVFS("vde", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), CORE_DVFS("mpe", avp_millivolts, 1, 1, KHZ, 1, 234000, 285000, 332000, 380000, 446000, 446000, 446000, 446000), @@ -361,10 +373,13 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("host1x", core_millivolts, 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), #ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 167000, 167000, 167000, 167000, 167000, 167000, 167000, 167000), +#endif #ifdef CONFIG_GPU_OC_446 CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 223000, 223000, 223000, 223000, 223000, 223000), #endif -#ifdef CONFIG_GPU_OC_446 +#ifdef CONFIG_GPU_OC_484 CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 242000, 242000, 242000, 242000, 242000, 242000), #endif #ifdef CONFIG_GPU_OC_520 @@ -387,6 +402,9 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("cbus", core_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), #ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 332000, 332000, 332000, 332000, 332000), +#endif #ifdef CONFIG_GPU_OC_446 CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), #endif @@ -412,6 +430,9 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("cbus", core_millivolts, 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #ifdef CONFIG_GPU_OVERCLOCK +#ifdef CONFIG_GPU_OC_332 + CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 667000, 667000, 667000, 667000, 667000, 667000), +#endif #ifdef CONFIG_GPU_OC_446 CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 892000, 892000, 892000, 892000, 892000, 892000), #endif From 3ae017fa4222b07424c22f174e4d56b2aae33e32 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 20:53:01 -0500 Subject: [PATCH 228/678] mach-tegra: cpu-tegra3.c: test hotplug delay changes --- arch/arm/mach-tegra/cpu-tegra3.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 4d8a5219879..82c5cf9122b 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,9 +39,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 1000 -#define UP2Gn_DELAY_MS 100 -#define DOWN_DELAY_MS 500 +#define UP2G0_DELAY_MS 2000 +#define UP2Gn_DELAY_MS 500 +#define DOWN_DELAY_MS 1000 static struct mutex *tegra3_cpu_lock; @@ -299,6 +299,7 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) break; case TEGRA_HP_UP: if (is_lp_cluster() && !no_lp) { +#ifndef CONFIG_TEGRA_LP_ONLY if(!clk_set_parent(cpu_clk, cpu_g_clk)) { #ifndef CONFIG_TEGRA_RUNNABLE_THREAD last_change_time = now; @@ -308,6 +309,7 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) /* catch-up with governor target speed */ tegra_cpu_set_speed_cap(NULL); } +#endif } else { switch (tegra_cpu_speed_balance()) { /* cpu speed is up and balanced - one more on-line */ @@ -364,6 +366,7 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) if ((n >= 1) && is_lp_cluster()) { /* make sure cpu rate is within g-mode range before switching */ +#ifndef CONFIG_TEGRA_LP_ONLY unsigned int speed = max( tegra_getspeed(0), clk_get_min_rate(cpu_g_clk) / 1000); tegra_update_cpu_speed(speed); @@ -375,6 +378,7 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } +#endif } /* update governor state machine */ tegra_cpu_set_speed_cap(NULL); @@ -400,12 +404,14 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) hp_state = TEGRA_HP_IDLE; /* Switch to G-mode if suspend rate is high enough */ +#ifndef CONFIG_TEGRA_LP_ONLY if (is_lp_cluster() && (cpu_freq >= idle_bottom_freq)) { if (!clk_set_parent(cpu_clk, cpu_g_clk)) { hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } } +#endif return; } From 90d61bd3e173efc1f4718d22d86e8083971aa5fa Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 20:54:36 -0500 Subject: [PATCH 229/678] tegra: move audio perflock kconfig and add kconfigs for new oc options --- arch/arm/mach-tegra/Kconfig | 24 ++++++++++++++++++++++-- sound/soc/tegra/Kconfig | 9 --------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index d84bef99a33..c19d7720d3b 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -45,7 +45,8 @@ config ARCH_TEGRA_3x_SOC select PCI_MSI if TEGRA_PCI select ARM_ERRATA_754322 select ARM_ERRATA_764369 - select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD && !TEGRA_RAIL_OFF_MULTIPLE_CPUS + select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD +# select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD && !TEGRA_RAIL_OFF_MULTIPLE_CPUS select CPA help Support for NVIDIA Tegra 3 family of SoCs, based upon the @@ -297,6 +298,8 @@ choice If you are not sure what you are doing, leave this option alone! + config GPU_OC_332 + bool "332 MHz" config GPU_OC_446 bool "446 MHz" config GPU_OC_484 @@ -339,8 +342,23 @@ choice bool "666 MHz" config LP_OC_700 bool "700 MHz" + config LP_OC_740 + bool "740 MHz" endchoice + +config LP_ONLY + bool "Only use the Low-Power companion core" + depends on TEGRA_SILICON_PLATFORM + default n + +config AUDIO_MIN_PERFLOCK + bool "Minimum Audio Playback performance lock" + depends on SND_SOC_TEGRA_RT5640 + default n + help + Tegra3 minimum Audio Performance Lock to prevent audio playback + underruns config TEGRA_CPU_DVFS bool "Enable voltage scaling on Tegra CPU" @@ -574,7 +592,9 @@ config TEGRA_LP2_ARM_TWD bool config TEGRA_RAIL_OFF_MULTIPLE_CPUS - bool + bool "Tegra Rail Off Multiple CPUs" + depends on TEGRA_SILICON_PLATFORM + default n config TEGRA_SLOW_CSITE bool "lower csite clock to 1 Mhz to reduce its power consumption" diff --git a/sound/soc/tegra/Kconfig b/sound/soc/tegra/Kconfig index 15e832b2628..806176bf81f 100644 --- a/sound/soc/tegra/Kconfig +++ b/sound/soc/tegra/Kconfig @@ -235,12 +235,3 @@ config HEADSET_FUNCTION tristate "Headset detection function" default n -config AUDIO_MIN_PERFLOCK - - bool "Minimum Audio Playback performance lock - depends on SND_SOC_TEGRA_RT5640 - default n - help - Tegra3 minimum Audio Performance Lock to prevent audio playback - underruns - From 741e49d9a2a99b46dff4d87269d594aa15ebd40b Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 20:55:44 -0500 Subject: [PATCH 230/678] mach-tegra: tegra3_speedo: fix variant info display --- arch/arm/mach-tegra/tegra3_speedo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-tegra/tegra3_speedo.c b/arch/arm/mach-tegra/tegra3_speedo.c index cc4f3b675f2..c5db102d5b1 100644 --- a/arch/arm/mach-tegra/tegra3_speedo.c +++ b/arch/arm/mach-tegra/tegra3_speedo.c @@ -440,6 +440,10 @@ void tegra_init_speedo_data(void) break; } } +#ifdef CONFIG_TEGRA_VARIANT_INFO + cpu_process_id = iv -1; + orig_cpu_process_id = cpu_process_id; +#endif cpu_process_id = 2; //iv -1; From 48a17613d354f935589b93cdce14f3f8ab4b5615 Mon Sep 17 00:00:00 2001 From: Michael Frydrych Date: Wed, 11 Jul 2012 11:43:42 +0300 Subject: [PATCH 231/678] arm: tegra: la: hack latency allowance formula Remove the ad-hoc scale factor of final latency allowance. Scale the fifo size to pretend that our FIFO is only as deep as the lowest fullness we expect to see. Bug 995270 Change-Id: I78ed2246d2031a2303f81a19fe05c95572a692b0 Signed-off-by: Michael Frydrych Reviewed-on: http://git-master/r/118816 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Krishna Reddy Reviewed-by: Graziano Misuraca Tested-by: Graziano Misuraca Reviewed-by: Jon Mayo --- arch/arm/mach-tegra/latency_allowance.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-tegra/latency_allowance.c b/arch/arm/mach-tegra/latency_allowance.c index 7698ba39f4c..f8e5ce57920 100644 --- a/arch/arm/mach-tegra/latency_allowance.c +++ b/arch/arm/mach-tegra/latency_allowance.c @@ -1,7 +1,7 @@ /* * arch/arm/mach-tegra/latency_allowance.c * - * Copyright (C) 2011 NVIDIA Corporation + * Copyright (C) 2011-2012, NVIDIA CORPORATION. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -100,6 +100,9 @@ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__); \ } +/* Bug 995270 */ +#define HACK_LA_FIFO 1 + static struct dentry *latency_debug_dir; struct la_client_info { @@ -384,7 +387,9 @@ int tegra_set_latency_allowance(enum tegra_la_id id, int la_to_set; unsigned long reg_read; unsigned long reg_write; + unsigned int fifo_size_in_atoms; int bytes_per_atom = normal_atom_size; + const int fifo_scale = 4; /* 25% of the FIFO */ struct la_client_info *ci; VALIDATE_ID(id); @@ -394,11 +399,19 @@ int tegra_set_latency_allowance(enum tegra_la_id id, bytes_per_atom = fdc_atom_size; ci = &la_info[id]; + fifo_size_in_atoms = ci->fifo_size_in_atoms; + +#if HACK_LA_FIFO + /* pretend that our FIFO is only as deep as the lowest fullness + * we expect to see */ + if (id >= ID(DISPLAY_0A) && id <= ID(DISPLAY_HCB)) + fifo_size_in_atoms /= fifo_scale; +#endif if (bandwidth_in_mbps == 0) { la_to_set = MC_LA_MAX_VALUE; } else { - ideal_la = (ci->fifo_size_in_atoms * bytes_per_atom * 1000) / + ideal_la = (fifo_size_in_atoms * bytes_per_atom * 1000) / (bandwidth_in_mbps * ns_per_tick); la_to_set = ideal_la - (ci->expiration_in_ns/ns_per_tick) - 1; } @@ -409,11 +422,6 @@ int tegra_set_latency_allowance(enum tegra_la_id id, la_to_set = (la_to_set > MC_LA_MAX_VALUE) ? MC_LA_MAX_VALUE : la_to_set; scaling_info[id].actual_la_to_set = la_to_set; - /* until display can use latency allowance scaling, use a more - * aggressive LA setting. Bug 862709 */ - if (id >= ID(DISPLAY_0A) && id <= ID(DISPLAY_HCB)) - la_to_set /= 3; - spin_lock(&safety_lock); reg_read = readl(ci->reg_addr); reg_write = (reg_read & ~ci->mask) | From b6cef1b018e34038254380c96a83edb847ce1aa8 Mon Sep 17 00:00:00 2001 From: Antti P Miettinen Date: Tue, 26 Jun 2012 16:54:37 +0300 Subject: [PATCH 232/678] ARM: tegra: Remove timer workaround Reprogramming running timers may cause timer interrupt state to get out of sync and result in lost timer interrupts. Bug 950482 Change-Id: I83c9d735f9b041e8a57d73ba466f5f9c89ca1b89 Signed-off-by: Antti P Miettinen Reviewed-on: http://git-master/r/111242 Reviewed-by: Simone Willett Tested-by: Simone Willett --- arch/arm/mach-tegra/cpuidle.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpuidle.c b/arch/arm/mach-tegra/cpuidle.c index 0e0ec7f2dbf..defe5ac71e5 100644 --- a/arch/arm/mach-tegra/cpuidle.c +++ b/arch/arm/mach-tegra/cpuidle.c @@ -128,9 +128,6 @@ static int tegra_idle_enter_lp2(struct cpuidle_device *dev, local_irq_enable(); - /* cpu clockevents may have been reset by powerdown */ - hrtimer_peek_ahead_timers(); - smp_rmb(); /* Update LP2 latency provided no fall back to LP3 */ From 71b0fd61b2e3e20a7d0c535f79b2cc95c055111f Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Mon, 13 Aug 2012 15:43:31 +0300 Subject: [PATCH 233/678] video: tegra: nvmap: Fix overflow in nvmap_heap do_heap_alloc locates a suitable free block from a nvmap heap given a size and alignment. Unfortunately, if a heap block happens to be smaller than the alignment passed to the function, an integer overflow will occur, and a block that's too small gets accidentally returned. Bug 1032642 Change-Id: Ic650c520409134d753e968f62f144ddeb065ccc7 Signed-off-by: Tuomas Tynkkynen Reviewed-on: http://git-master/r/123076 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Krishna Reddy --- drivers/video/tegra/nvmap/nvmap_heap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c index 7474f31534f..5ddebf6b99c 100644 --- a/drivers/video/tegra/nvmap/nvmap_heap.c +++ b/drivers/video/tegra/nvmap/nvmap_heap.c @@ -3,7 +3,7 @@ * * GPU heap allocator. * - * Copyright (c) 2011, NVIDIA Corporation. + * Copyright (c) 2012, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -420,6 +420,9 @@ static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap, list_for_each_entry(i, &heap->free_list, free_list) { size_t fix_size; fix_base = ALIGN(i->block.base, align); + if(!fix_base || fix_base >= i->block.base + i->size) + continue; + fix_size = i->size - (fix_base - i->block.base); /* needed for compaction. relocated chunk From 76c3f0205ad48d31ca976159b2303c050b31dbd2 Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Thu, 16 Aug 2012 19:56:51 +0300 Subject: [PATCH 234/678] video: tegra: host: Fix crash if allocation fails nvhost_module_remove_client assumes that a client structure to be freed exists in the linked list. However, if an allocation fails in nvhost_module_add_client, no client structure is allocated, and during cleanup, nvhost_module_remove_client would then attempt to free an invalid pointer. Bug 1034729 Change-Id: Ie1a641071b86f8246951e9be824a6003f14b04b6 Signed-off-by: Tuomas Tynkkynen Reviewed-on: http://git-master/r/124096 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: Juha Tukkinen --- drivers/video/tegra/host/bus_client.c | 3 ++- drivers/video/tegra/host/nvhost_acm.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/video/tegra/host/bus_client.c b/drivers/video/tegra/host/bus_client.c index eb6fc1b7aab..0d253b46265 100644 --- a/drivers/video/tegra/host/bus_client.c +++ b/drivers/video/tegra/host/bus_client.c @@ -192,7 +192,8 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp) } filp->private_data = priv; priv->ch = ch; - nvhost_module_add_client(ch->dev, priv); + if(nvhost_module_add_client(ch->dev, priv)) + goto fail; if (ch->ctxhandler && ch->ctxhandler->alloc) { priv->hwctx = ch->ctxhandler->alloc(ch->ctxhandler, ch); diff --git a/drivers/video/tegra/host/nvhost_acm.c b/drivers/video/tegra/host/nvhost_acm.c index 7865583b0fa..2f4fa060018 100644 --- a/drivers/video/tegra/host/nvhost_acm.c +++ b/drivers/video/tegra/host/nvhost_acm.c @@ -331,15 +331,17 @@ void nvhost_module_remove_client(struct nvhost_device *dev, void *priv) { int i; struct nvhost_module_client *m; + int found = 0; mutex_lock(&client_list_lock); list_for_each_entry(m, &dev->client_list, node) { if (priv == m->priv) { list_del(&m->node); + found = 1; break; } } - if (m) { + if (found) { kfree(m); for (i = 0; i < dev->num_clks; i++) nvhost_module_update_rate(dev, i); From a6188919776f17e15ff23eb0083e95e47aa32b65 Mon Sep 17 00:00:00 2001 From: Krishna Reddy Date: Wed, 1 Aug 2012 15:15:21 -0700 Subject: [PATCH 235/678] video: tegra: nvmap: Add sanity checks for page pools. Check return code for set_page_array_* calls. Change-Id: Ie62ac78b82321939d5bd9d2a636d72dadea50d28 Signed-off-by: Krishna Reddy Reviewed-on: http://git-master/r/123544 Conflicts: drivers/video/tegra/nvmap/nvmap_handle.c --- drivers/video/tegra/nvmap/nvmap_handle.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c index 539b7ce9801..eddaef875f3 100644 --- a/drivers/video/tegra/nvmap/nvmap_handle.c +++ b/drivers/video/tegra/nvmap/nvmap_handle.c @@ -91,8 +91,11 @@ static struct page *nvmap_page_pool_alloc_locked(struct nvmap_page_pool *pool) { struct page *page = NULL; - if (pool->npages > 0) + if (pool->npages > 0) { page = pool->page_array[--pool->npages]; + atomic_dec(&page->_count); + BUG_ON(atomic_read(&page->_count) != 1); + } return page; } @@ -113,7 +116,9 @@ static bool nvmap_page_pool_release_locked(struct nvmap_page_pool *pool, { int ret = false; + BUG_ON(atomic_read(&page->_count) != 1); if (enable_pp && pool->npages < pool->max_pages) { + atomic_inc(&page->_count); pool->page_array[pool->npages++] = page; ret = true; } @@ -140,6 +145,7 @@ static int nvmap_page_pool_get_available_count(struct nvmap_page_pool *pool) static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free) { + int err; int i = nr_free; int idx = 0; struct page *page; @@ -155,8 +161,12 @@ static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free) i--; } - if (idx) - set_pages_array_wb(pool->shrink_array, idx); + if (idx) { + /* This op should never fail. */ + err = set_pages_array_wb(pool->shrink_array, idx); + BUG_ON(err); + } + while (idx--) __free_page(pool->shrink_array[idx]); nvmap_page_pool_unlock(pool); From b60bf940e6d749fa5fdb3174892cc763201010c3 Mon Sep 17 00:00:00 2001 From: Alex Frid Date: Thu, 1 Mar 2012 15:20:40 -0800 Subject: [PATCH 236/678] ARM: tegra: clock: Set SCLK floor for CPU mode switch Set SCLK floor to 80MHz for Tegra3 CPU mode switch. Bug 933984 Change-Id: Ibbb0a24cd763c11b3cead60efe26096bae3e6ddd Signed-off-by: Alex Frid Reviewed-on: http://git-master/r/106035 Reviewed-by: Prajakta Gudadhe Tested-by: Jay Cheng (cherry picked from commit 842f7ddb7a188e36a2ff153dc0d8ed38b5e28319) Reviewed-on: http://git-master/r/113981 Reviewed-by: Simone Willett Tested-by: Simone Willett Conflicts: arch/arm/mach-tegra/common.c --- arch/arm/mach-tegra/common.c | 290 ++++++++++------------------ arch/arm/mach-tegra/tegra3_clocks.c | 10 +- 2 files changed, 108 insertions(+), 192 deletions(-) diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index 4eabbdc436d..aa6f0192c5d 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -47,8 +48,6 @@ #include "reset.h" #include "devices.h" -#define PMC_SCRATCH37 0x130 - #define MC_SECURITY_CFG2 0x7c #define AHB_ARBITRATION_PRIORITY_CTRL 0x4 @@ -64,7 +63,6 @@ #define RECOVERY_MODE BIT(31) #define BOOTLOADER_MODE BIT(30) #define FORCED_RECOVERY_MODE BIT(1) -#define GO_TO_CHARGER_MODE (0xA5A55A5A) #define AHB_GIZMO_USB 0x1c #define AHB_GIZMO_USB2 0x78 @@ -103,7 +101,7 @@ static struct board_info pmu_board_info; static struct board_info display_board_info; static struct board_info camera_board_info; -static int pmu_core_edp = 1700; /* default 1.2V EDP limit */ +static int pmu_core_edp = 1200; /* default 1.2V EDP limit */ static int board_panel_type; static enum power_supply_type pow_supply_type = POWER_SUPPLY_TYPE_MAINS; @@ -137,15 +135,6 @@ void tegra_assert_system_reset(char mode, const char *cmd) reg &= ~(BOOTLOADER_MODE | RECOVERY_MODE | FORCED_RECOVERY_MODE); } writel_relaxed(reg, reset + PMC_SCRATCH0); - - if (cmd && !strcmp(cmd, "chrager-mode")) - { - reg = readl_relaxed(reset + PMC_SCRATCH37); - reg = GO_TO_CHARGER_MODE; - writel_relaxed(GO_TO_CHARGER_MODE, reset + PMC_SCRATCH37); - //printk("tegra_assert_system_reset reg =%x",reg ); - } - /* use *_related to avoid spinlock since caches are off */ reg = readl_relaxed(reset); reg |= 0x10; @@ -153,6 +142,7 @@ void tegra_assert_system_reset(char mode, const char *cmd) #endif } static int modem_id; +static int commchip_id; static int sku_override; static int debug_uart_port_id; static enum audio_codec_type audio_codec_name; @@ -165,6 +155,11 @@ static int max_cpu_current; static __initdata struct tegra_clk_init_table common_clk_init_table[] = { /* name parent rate enabled */ { "clk_m", NULL, 0, true }, + { "emc", NULL, 0, true }, + { "cpu", NULL, 0, true }, + { "kfuse", NULL, 0, true }, + { "fuse", NULL, 0, true }, + { "sclk", NULL, 0, true }, #ifdef CONFIG_TEGRA_SILICON_PLATFORM #ifdef CONFIG_ARCH_TEGRA_2x_SOC { "pll_p", NULL, 216000000, true }, @@ -172,7 +167,7 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "pll_p_out2", "pll_p", 48000000, false }, { "pll_p_out3", "pll_p", 72000000, true }, { "pll_p_out4", "pll_p", 108000000, false }, - { "pll_m", "pll_ref", 0, true }, + { "pll_m", "clk_m", 0, true }, { "pll_m_out1", "pll_m", 120000000, true }, { "sclk", "pll_c_out1", 40000000, true }, { "hclk", "sclk", 40000000, true }, @@ -193,14 +188,7 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "sclk", "pll_p_out4", 102000000, true }, { "hclk", "sclk", 102000000, true }, { "pclk", "hclk", 51000000, true }, - { "wake.sclk", NULL, 40000000, true }, - { "sbc5.sclk", NULL, 40000000, false}, - { "sbc6.sclk", NULL, 40000000, false}, #endif - { "sbc1.sclk", NULL, 40000000, false}, - { "sbc2.sclk", NULL, 40000000, false}, - { "sbc3.sclk", NULL, 40000000, false}, - { "sbc4.sclk", NULL, 40000000, false}, #else { "pll_p", NULL, 216000000, true }, { "pll_p_out1", "pll_p", 28800000, false }, @@ -211,25 +199,25 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "sclk", "pll_p_out4", 108000000, true }, { "hclk", "sclk", 108000000, true }, { "pclk", "hclk", 54000000, true }, - { "pll_c", NULL, ULONG_MAX, false }, - { "pll_c_out1", "pll_c", 208000000, false }, #endif #ifdef CONFIG_TEGRA_SLOW_CSITE { "csite", "clk_m", 1000000, true }, #else { "csite", NULL, 0, true }, #endif - { "emc", NULL, 0, true }, - { "cpu", NULL, 0, true }, - { "kfuse", NULL, 0, true }, - { "fuse", NULL, 0, true }, { "pll_u", NULL, 480000000, false }, { "sdmmc1", "pll_p", 48000000, false}, { "sdmmc3", "pll_p", 48000000, false}, { "sdmmc4", "pll_p", 48000000, false}, - { "pll_a", "pll_p_out1", 0, false}, - { "pll_a_out0", "pll_a", 0, false}, + { "sbc1.sclk", NULL, 40000000, false}, + { "sbc2.sclk", NULL, 40000000, false}, + { "sbc3.sclk", NULL, 40000000, false}, + { "sbc4.sclk", NULL, 40000000, false}, #ifndef CONFIG_ARCH_TEGRA_2x_SOC + { "sbc5.sclk", NULL, 40000000, false}, + { "sbc6.sclk", NULL, 40000000, false}, + { "wake.sclk", NULL, 40000000, true }, + { "cpu_mode.sclk", NULL, 80000000, false }, { "cbus", "pll_c", 416000000, false }, { "pll_c_out1", "pll_c", 208000000, false }, { "mselect", "pll_p", 102000000, true }, @@ -313,8 +301,6 @@ void tegra_init_cache(bool init) { void __iomem *p = IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x3000; u32 aux_ctrl; - u32 speedo; - u32 tmp; #ifdef CONFIG_TRUSTED_FOUNDATIONS /* issue the SMC to enable the L2 */ @@ -341,6 +327,8 @@ void tegra_init_cache(bool init) writel(0x221, p + L2X0_TAG_LATENCY_CTRL); writel(0x221, p + L2X0_DATA_LATENCY_CTRL); } else { + u32 speedo; + /* relax l2-cache latency for speedos 4,5,6 (T33's chips) */ speedo = tegra_cpu_speedo_id(); if (speedo == 4 || speedo == 5 || speedo == 6 || @@ -357,12 +345,15 @@ void tegra_init_cache(bool init) writel(0x770, p + L2X0_DATA_LATENCY_CTRL); #endif #endif + writel(0x3, p + L2X0_POWER_CTRL); aux_ctrl = readl(p + L2X0_CACHE_TYPE); aux_ctrl = (aux_ctrl & 0x700) << (17-8); aux_ctrl |= 0x7C000001; if (init) { l2x0_init(p, aux_ctrl, 0x8200c3fe); } else { + u32 tmp; + tmp = aux_ctrl; aux_ctrl = readl(p + L2X0_AUX_CTRL); aux_ctrl &= 0x8200c3fe; @@ -644,14 +635,16 @@ enum audio_codec_type get_audio_codec_type(void) } __setup("audio_codec=", tegra_audio_codec_type); + void tegra_get_board_info(struct board_info *bi) { - bi->board_id = 0xF41; - bi->sku = 0xA00; - bi->fab =0x1; - bi->major_revision = 0x044; - bi->minor_revision = 0x2; + bi->board_id = (system_serial_high >> 16) & 0xFFFF; + bi->sku = (system_serial_high) & 0xFFFF; + bi->fab = (system_serial_low >> 24) & 0xFF; + bi->major_revision = (system_serial_low >> 16) & 0xFF; + bi->minor_revision = (system_serial_low >> 8) & 0xFF; } + static int __init tegra_pmu_board_info(char *info) { char *p = info; @@ -665,7 +658,6 @@ static int __init tegra_pmu_board_info(char *info) void tegra_get_pmu_board_info(struct board_info *bi) { - pmu_board_info.sku = 0x1; memcpy(bi, &pmu_board_info, sizeof(struct board_info)); } @@ -722,6 +714,22 @@ int tegra_get_modem_id(void) __setup("modem_id=", tegra_modem_id); +static int __init tegra_commchip_id(char *id) +{ + char *p = id; + + if (get_option(&p, &commchip_id) != 1) + return 0; + return 1; +} + +int tegra_get_commchip_id(void) +{ + return commchip_id; +} + +__setup("commchip_id=", tegra_commchip_id); + /* * Tegra has a protected aperture that prevents access by most non-CPU * memory masters to addresses above the aperture value. Enabling it @@ -789,28 +797,9 @@ void tegra_move_framebuffer(unsigned long to, unsigned long from, iounmap(to_io); } -#ifdef CONFIG_TEGRA_SMMU_BASE_AT_E0000000 -#define FORCE_SMMU_BASE_FOR_TEGRA3_A01 1 -#else -#define FORCE_SMMU_BASE_FOR_TEGRA3_A01 0 -#endif -#if FORCE_SMMU_BASE_FOR_TEGRA3_A01 || \ - (defined(CONFIG_TEGRA_IOVMM_SMMU) && defined(CONFIG_ARCH_TEGRA_3x_SOC)) -/* Support for Tegra3 A01 chip mask that needs to have SMMU IOVA reside in - * the upper half of 4GB IOVA space. A02 and after use the bottom 1GB and - * do not need to reserve memory. - */ -#define SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 -#endif - void __init tegra_reserve(unsigned long carveout_size, unsigned long fb_size, unsigned long fb2_size) { -#ifdef SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 - int smmu_reserved = 0; - struct tegra_smmu_window *smmu_window = tegra_smmu_window(0); -#endif - if (carveout_size) { tegra_carveout_start = memblock_end_of_DRAM() - carveout_size; if (memblock_remove(tegra_carveout_start, carveout_size)) { @@ -856,33 +845,6 @@ void __init tegra_reserve(unsigned long carveout_size, unsigned long fb_size, if (tegra_carveout_size && tegra_carveout_start < tegra_grhost_aperture) tegra_grhost_aperture = tegra_carveout_start; -#ifdef SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 - if (!smmu_window) { - pr_err("No SMMU resource\n"); - } else { - size_t smmu_window_size; - - if (FORCE_SMMU_BASE_FOR_TEGRA3_A01 || - (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3 && - tegra_get_revision() == TEGRA_REVISION_A01)) { - smmu_window->start = TEGRA_SMMU_BASE_TEGRA3_A01; - smmu_window->end = TEGRA_SMMU_BASE_TEGRA3_A01 + - TEGRA_SMMU_SIZE_TEGRA3_A01 - 1; - } - smmu_window_size = smmu_window->end + 1 - smmu_window->start; - if (smmu_window->start >= 0x80000000) { - if (memblock_reserve(smmu_window->start, - smmu_window_size)) - pr_err( - "Failed to reserve SMMU I/O VA window %08lx@%08lx\n", - (unsigned long)smmu_window_size, - (unsigned long)smmu_window->start); - else - smmu_reserved = 1; - } - } -#endif - if (tegra_lp0_vec_size && (tegra_lp0_vec_start < memblock_end_of_DRAM())) { if (memblock_reserve(tegra_lp0_vec_start, tegra_lp0_vec_size)) { @@ -936,12 +898,6 @@ void __init tegra_reserve(unsigned long carveout_size, unsigned long fb_size, tegra_vpr_start, tegra_vpr_size ? tegra_vpr_start + tegra_vpr_size - 1 : 0); - -#ifdef SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 - if (smmu_reserved) - pr_info("SMMU: %08lx - %08lx\n", - smmu_window->start, smmu_window->end); -#endif } static struct resource ram_console_resources[] = { @@ -961,20 +917,13 @@ void __init tegra_ram_console_debug_reserve(unsigned long ram_console_size) { struct resource *res; long ret; - unsigned long real_start, real_size; res = platform_get_resource(&ram_console_device, IORESOURCE_MEM, 0); if (!res) goto fail; - res->start = memblock_end_of_DRAM() - ram_console_size; res->end = res->start + ram_console_size - 1; - - // Register an extra 1M before ramconsole to store kexec stuff - real_start = res->start - SZ_1M; - real_size = ram_console_size + SZ_1M; - - ret = memblock_remove(real_start, real_size); + ret = memblock_remove(res->start, ram_console_size); if (ret) goto fail; @@ -1006,118 +955,77 @@ void __init tegra_release_bootloader_fb(void) } #ifdef CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND -static char cpufreq_gov_default[32]; -static char *cpufreq_gov_conservative = "conservative"; -static char *cpufreq_sysfs_place_holder="/sys/devices/system/cpu/cpu%i/cpufreq/scaling_governor"; -static char *cpufreq_gov_conservative_param="/sys/devices/system/cpu/cpufreq/conservative/%s"; +char cpufreq_default_gov[CONFIG_NR_CPUS][MAX_GOV_NAME_LEN]; +char *cpufreq_conservative_gov = "conservative"; -static void cpufreq_set_governor(char *governor) +void cpufreq_store_default_gov(void) { - struct file *scaling_gov = NULL; - mm_segment_t old_fs; - char buf[128]; - int i = 0; - loff_t offset = 0; + unsigned int cpu = 0; + struct cpufreq_policy *policy; - if (governor == NULL) - return; - - /* change to KERNEL_DS address limit */ - old_fs = get_fs(); - set_fs(KERNEL_DS); #ifndef CONFIG_TEGRA_AUTO_HOTPLUG - for_each_online_cpu(i) + for_each_online_cpu(cpu) #endif { - sprintf(buf, cpufreq_sysfs_place_holder, i); - scaling_gov = filp_open(buf, O_RDWR, 0); - if (scaling_gov != NULL) { - if (scaling_gov->f_op != NULL && - scaling_gov->f_op->write != NULL) - scaling_gov->f_op->write(scaling_gov, - governor, - strlen(governor), - &offset); - else - pr_err("f_op might be null\n"); - - filp_close(scaling_gov, NULL); + policy = cpufreq_cpu_get(cpu); + if (policy && policy->governor) { + sprintf(cpufreq_default_gov[cpu], "%s", + policy->governor->name); + cpufreq_cpu_put(policy); } else { - pr_err("%s. Can't open %s\n", __func__, buf); + /* No policy or no gov set for this + * online cpu. If we are here, require + * serious debugging hence setting + * as pr_error. + */ + pr_err("No gov or No policy for online cpu:%d," + , cpu); } } - set_fs(old_fs); } -void cpufreq_save_default_governor(void) +void cpufreq_change_gov(char *target_gov) { - struct file *scaling_gov = NULL; - mm_segment_t old_fs; - char buf[128]; - loff_t offset = 0; - - /* change to KERNEL_DS address limit */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - - buf[127] = 0; - sprintf(buf, cpufreq_sysfs_place_holder,0); - scaling_gov = filp_open(buf, O_RDONLY, 0); - if (scaling_gov != NULL) { - if (scaling_gov->f_op != NULL && - scaling_gov->f_op->read != NULL) - scaling_gov->f_op->read(scaling_gov, - cpufreq_gov_default, - 32, - &offset); - else - pr_err("f_op might be null\n"); + int ret = -EINVAL; + unsigned int cpu = 0; - filp_close(scaling_gov, NULL); - } else { - pr_err("%s. Can't open %s\n", __func__, buf); +#ifndef CONFIG_TEGRA_AUTO_HOTPLUG + for_each_online_cpu(cpu) +#endif + { + ret = cpufreq_set_gov(target_gov, cpu); + if (ret < 0) + /* Unable to set gov for the online cpu. + * If it happens, needs to debug. + */ + pr_info("Unable to set gov:%s for online cpu:%d," + , cpufreq_default_gov[cpu] + , cpu); } - set_fs(old_fs); } -void cpufreq_restore_default_governor(void) +void cpufreq_restore_default_gov(void) { - cpufreq_set_governor(cpufreq_gov_default); -} + int ret = -EINVAL; + unsigned int cpu = 0; -void cpufreq_set_conservative_governor_param(char *name, int value) -{ - struct file *gov_param = NULL; - mm_segment_t old_fs; - static char buf[128], param_value[8]; - loff_t offset = 0; - - /* change to KERNEL_DS address limit */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - - sprintf(param_value, "%d", value); - sprintf(buf, cpufreq_gov_conservative_param, name); - gov_param = filp_open(buf, O_RDWR, 0); - if (gov_param != NULL) { - if (gov_param->f_op != NULL && - gov_param->f_op->write != NULL) - gov_param->f_op->write(gov_param, - param_value, - strlen(param_value), - &offset); - else - pr_err("f_op might be null\n"); - - filp_close(gov_param, NULL); - } else { - pr_err("%s. Can't open %s\n", __func__, buf); +#ifndef CONFIG_TEGRA_AUTO_HOTPLUG + for_each_online_cpu(cpu) +#endif + { + if (&cpufreq_default_gov[cpu] && + strlen((const char *)&cpufreq_default_gov[cpu])) { + ret = cpufreq_set_gov(cpufreq_default_gov[cpu], cpu); + if (ret < 0) + /* Unable to restore gov for the cpu as + * It was online on suspend and becomes + * offline on resume. + */ + pr_info("Unable to restore gov:%s for cpu:%d," + , cpufreq_default_gov[cpu] + , cpu); + } + cpufreq_default_gov[cpu][0] = '\0'; } - set_fs(old_fs); -} - -void cpufreq_set_conservative_governor(void) -{ - cpufreq_set_governor(cpufreq_gov_conservative); } #endif /* CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND */ diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index b62613a359f..a896e88bdcf 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -309,6 +309,7 @@ static int tegra3_clk_shared_bus_update(struct clk *bus); static unsigned long cpu_stay_on_backup_max; static struct clk *emc_bridge; +static struct clk *cpu_mode_sclk; static bool detach_shared_bus; module_param(detach_shared_bus, bool, 0644); @@ -1045,6 +1046,8 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) flags |= (p->u.cpu.mode == MODE_LP) ? TEGRA_POWER_CLUSTER_LP : TEGRA_POWER_CLUSTER_G; + clk_enable(cpu_mode_sclk); /* set SCLK floor for cluster switch */ + /* Since in both LP and G mode CPU main and backup sources are the same, set rate on the new parent just synchronizes super-clock muxes before mode switch with no PLL re-locking */ @@ -1052,6 +1055,7 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) if (ret) { pr_err("%s: Failed to set rate %lu for %s\n", __func__, rate, p->name); + clk_disable(cpu_mode_sclk); return ret; } @@ -1067,6 +1071,7 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) clk_disable(p); pr_err("%s: Failed to switch %s mode to %s\n", __func__, c->name, p->name); + clk_disable(cpu_mode_sclk); return ret; } @@ -1075,6 +1080,7 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) clk_disable(c->parent); clk_reparent(c, p); + clk_disable(cpu_mode_sclk); return 0; } @@ -4336,7 +4342,8 @@ struct clk tegra_list_clks[] = { SHARED_CLK("usb1.sclk", "tegra-ehci.0", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("usb2.sclk", "tegra-ehci.1", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("usb3.sclk", "tegra-ehci.2", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), - SHARED_CLK("wake.sclk", "wake_sclk", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), + SHARED_CLK("wake.sclk", "wake_sclk", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), + SHARED_CLK("cpu_mode.sclk","cpu_mode", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("mon.avp", "tegra_actmon", "avp", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("cap.sclk", "cap_sclk", NULL, &tegra_clk_sbus_cmplx, NULL, 0, SHARED_CEILING), SHARED_CLK("floor.sclk", "floor_sclk", NULL, &tegra_clk_sbus_cmplx, NULL, 0, 0), @@ -5286,6 +5293,7 @@ void __init tegra_soc_init_clocks(void) tegra3_init_one_clock(&tegra_clk_out_list[i]); emc_bridge = &tegra_clk_emc_bridge; + cpu_mode_sclk = tegra_get_clock_by_name("cpu_mode.sclk"); /* Initialize to default */ tegra_init_cpu_edp_limits(0); From b0017d5ff3b294d0e1b7cbe21a45613b3399570c Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 5 Dec 2012 23:35:32 -0500 Subject: [PATCH 237/678] mach-tegra: board-grouper-panel.c: backlight tweaks --- arch/arm/mach-tegra/board-grouper-panel.c | 36 ++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 96e479f92af..b2bfcc4ca4c 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -66,7 +66,7 @@ static struct regulator *grouper_lvds_reg; static struct regulator *grouper_lvds_vdd_panel; static tegra_dc_bl_output grouper_bl_output_measured = { - 0, 5, 5, 5, 5, 5, 6, 7, +/* 0, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, @@ -98,6 +98,40 @@ static tegra_dc_bl_output grouper_bl_output_measured = { 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +*/ + /* 0 - 15 */ + 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, + /* 16 - 31 */ + 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, + /* 32 - 47 */ + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + /* 48 - 63 */ + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + /* 64 - 79 */ + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + /* 80 - 95 */ + 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, + /* 96 - 111 */ + 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + /* 112 - 127 */ + 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, + /* 128 - 143 */ + 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + /* 144 - 159 */ + 142, 143, 144, 145, 146, 147, 148, 148, 149, 150, 151, 152, 153, 154, 155, 156, + /* 160 - 175 */ + 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, + /* 176 - 191 */ + 173, 174, 175, 176, 177, 179, 180, 181, 182, 184, 185, 186, 187, 188, 189, 190, + /* 192 - 207 */ + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, + /* 208 - 223 */ + 207, 208, 209, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, + /* 224 - 239 */ + 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + /* 240 - 255 */ + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 + }; static p_tegra_dc_bl_output bl_output; From c8685e79c28d576cd2e924929fc841c5e23529b3 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 6 Dec 2012 02:23:48 -0500 Subject: [PATCH 238/678] Revert "mach-tegra: cpu-tegra3.c: test hotplug delay changes" This reverts commit 3ae017fa4222b07424c22f174e4d56b2aae33e32. --- arch/arm/mach-tegra/cpu-tegra3.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 82c5cf9122b..4d8a5219879 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,9 +39,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 2000 -#define UP2Gn_DELAY_MS 500 -#define DOWN_DELAY_MS 1000 +#define UP2G0_DELAY_MS 1000 +#define UP2Gn_DELAY_MS 100 +#define DOWN_DELAY_MS 500 static struct mutex *tegra3_cpu_lock; @@ -299,7 +299,6 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) break; case TEGRA_HP_UP: if (is_lp_cluster() && !no_lp) { -#ifndef CONFIG_TEGRA_LP_ONLY if(!clk_set_parent(cpu_clk, cpu_g_clk)) { #ifndef CONFIG_TEGRA_RUNNABLE_THREAD last_change_time = now; @@ -309,7 +308,6 @@ static void tegra_auto_hotplug_work_func(struct work_struct *work) /* catch-up with governor target speed */ tegra_cpu_set_speed_cap(NULL); } -#endif } else { switch (tegra_cpu_speed_balance()) { /* cpu speed is up and balanced - one more on-line */ @@ -366,7 +364,6 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) if ((n >= 1) && is_lp_cluster()) { /* make sure cpu rate is within g-mode range before switching */ -#ifndef CONFIG_TEGRA_LP_ONLY unsigned int speed = max( tegra_getspeed(0), clk_get_min_rate(cpu_g_clk) / 1000); tegra_update_cpu_speed(speed); @@ -378,7 +375,6 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } -#endif } /* update governor state machine */ tegra_cpu_set_speed_cap(NULL); @@ -404,14 +400,12 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) hp_state = TEGRA_HP_IDLE; /* Switch to G-mode if suspend rate is high enough */ -#ifndef CONFIG_TEGRA_LP_ONLY if (is_lp_cluster() && (cpu_freq >= idle_bottom_freq)) { if (!clk_set_parent(cpu_clk, cpu_g_clk)) { hp_stats_update(CONFIG_NR_CPUS, false); hp_stats_update(0, true); } } -#endif return; } From a7af8b12fc6ef28dab6c261c29fd0800e5f6e468 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 21:01:14 -0500 Subject: [PATCH 239/678] Revert "arm: tegra: la: hack latency allowance formula" This reverts commit 48a17613d354f935589b93cdce14f3f8ab4b5615. --- arch/arm/mach-tegra/latency_allowance.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-tegra/latency_allowance.c b/arch/arm/mach-tegra/latency_allowance.c index f8e5ce57920..7698ba39f4c 100644 --- a/arch/arm/mach-tegra/latency_allowance.c +++ b/arch/arm/mach-tegra/latency_allowance.c @@ -1,7 +1,7 @@ /* * arch/arm/mach-tegra/latency_allowance.c * - * Copyright (C) 2011-2012, NVIDIA CORPORATION. All rights reserved. + * Copyright (C) 2011 NVIDIA Corporation * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -100,9 +100,6 @@ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__); \ } -/* Bug 995270 */ -#define HACK_LA_FIFO 1 - static struct dentry *latency_debug_dir; struct la_client_info { @@ -387,9 +384,7 @@ int tegra_set_latency_allowance(enum tegra_la_id id, int la_to_set; unsigned long reg_read; unsigned long reg_write; - unsigned int fifo_size_in_atoms; int bytes_per_atom = normal_atom_size; - const int fifo_scale = 4; /* 25% of the FIFO */ struct la_client_info *ci; VALIDATE_ID(id); @@ -399,19 +394,11 @@ int tegra_set_latency_allowance(enum tegra_la_id id, bytes_per_atom = fdc_atom_size; ci = &la_info[id]; - fifo_size_in_atoms = ci->fifo_size_in_atoms; - -#if HACK_LA_FIFO - /* pretend that our FIFO is only as deep as the lowest fullness - * we expect to see */ - if (id >= ID(DISPLAY_0A) && id <= ID(DISPLAY_HCB)) - fifo_size_in_atoms /= fifo_scale; -#endif if (bandwidth_in_mbps == 0) { la_to_set = MC_LA_MAX_VALUE; } else { - ideal_la = (fifo_size_in_atoms * bytes_per_atom * 1000) / + ideal_la = (ci->fifo_size_in_atoms * bytes_per_atom * 1000) / (bandwidth_in_mbps * ns_per_tick); la_to_set = ideal_la - (ci->expiration_in_ns/ns_per_tick) - 1; } @@ -422,6 +409,11 @@ int tegra_set_latency_allowance(enum tegra_la_id id, la_to_set = (la_to_set > MC_LA_MAX_VALUE) ? MC_LA_MAX_VALUE : la_to_set; scaling_info[id].actual_la_to_set = la_to_set; + /* until display can use latency allowance scaling, use a more + * aggressive LA setting. Bug 862709 */ + if (id >= ID(DISPLAY_0A) && id <= ID(DISPLAY_HCB)) + la_to_set /= 3; + spin_lock(&safety_lock); reg_read = readl(ci->reg_addr); reg_write = (reg_read & ~ci->mask) | From ac4e69ae44dbac37d841595576c17f85d072119d Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 23:11:05 -0500 Subject: [PATCH 240/678] mach-tegra: board-grouper-panel.c: lower min SD brightness --- arch/arm/mach-tegra/board-grouper-panel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index b2bfcc4ca4c..f7f6fe2dc15 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -443,7 +443,7 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 13, + .panel_min_brightness = 5, .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, From 292f1ea81e0d8721e58db762a6d6e27b3248b931 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 23:12:10 -0500 Subject: [PATCH 241/678] mach-tegra: fixes for custom voltage and oc --- arch/arm/mach-tegra/tegra3_clocks.c | 10 +++++----- arch/arm/mach-tegra/tegra3_emc.c | 18 +++++++++--------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index a896e88bdcf..67be8d31d47 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -2952,11 +2952,11 @@ static noinline int shared_bus_set_rate(struct clk *bus, unsigned long rate, mv = tegra_dvfs_predict_millivolts(bus, rate); old_mv = tegra_dvfs_predict_millivolts(bus, old_rate); - if (IS_ERR_VALUE(mv) || IS_ERR_VALUE(old_mv)) { - pr_err("%s: Failed to predict %s voltage for %lu => %lu\n", - __func__, bus->name, old_rate, rate); - return -EINVAL; - } +// if (IS_ERR_VALUE(mv) || IS_ERR_VALUE(old_mv)) { +// pr_err("%s: Failed to predict %s voltage for %lu => %lu\n", +// __func__, bus->name, old_rate, rate); +// return -EINVAL; +// } /* emc bus: set bridge rate as intermediate step when crossing * bridge threshold in any direction diff --git a/arch/arm/mach-tegra/tegra3_emc.c b/arch/arm/mach-tegra/tegra3_emc.c index 3601c1449dd..63a7887bcd8 100755 --- a/arch/arm/mach-tegra/tegra3_emc.c +++ b/arch/arm/mach-tegra/tegra3_emc.c @@ -890,8 +890,8 @@ static bool is_emc_bridge(void) return false; mv = tegra_dvfs_predict_millivolts(emc, rate); - if (IS_ERR_VALUE(mv) || (mv > TEGRA_EMC_BRIDGE_MVOLTS_MIN)) - return false; +// if (IS_ERR_VALUE(mv) || (mv > TEGRA_EMC_BRIDGE_MVOLTS_MIN)) +// return false; if (clk_set_rate(bridge, rate)) return false; @@ -1030,13 +1030,13 @@ void tegra_init_emc(const struct tegra_emc_table *table, int table_size) adjust_emc_dvfs_table(tegra_emc_table, tegra_emc_table_size); mv = tegra_dvfs_predict_millivolts(emc, max_rate * 1000); - if ((mv <= 0) || (mv > emc->dvfs->max_millivolts)) { - tegra_emc_table = NULL; - pr_err("tegra: invalid EMC DFS table: maximum rate %lu kHz does" - " not match nominal voltage %d\n", - max_rate, emc->dvfs->max_millivolts); - return; - } +// if ((mv <= 0) || (mv > emc->dvfs->max_millivolts)) { +// tegra_emc_table = NULL; +// pr_err("tegra: invalid EMC DFS table: maximum rate %lu kHz does" +// " not match nominal voltage %d\n", +// max_rate, emc->dvfs->max_millivolts); +// return; +// } if (!is_emc_bridge()) { tegra_emc_table = NULL; From 86454dd5c586118bc1d99f5a6663e084dda47dd4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 23:12:45 -0500 Subject: [PATCH 242/678] cpufreq: ondemand.c: lower up threshold --- drivers/cpufreq/cpufreq_ondemand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c3bb3f16290..d0407135339 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -38,7 +38,7 @@ */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (3) -#define DEF_FREQUENCY_UP_THRESHOLD (95) +#define DEF_FREQUENCY_UP_THRESHOLD (85) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) From 59b368a5f05c9d0bb71059b4c19f6ba9ee61af15 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 23:13:39 -0500 Subject: [PATCH 243/678] mach-tegra: dvfs.c: more fixes for custom voltage --- arch/arm/mach-tegra/dvfs.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-tegra/dvfs.c b/arch/arm/mach-tegra/dvfs.c index 8723e6fa60d..21069e1c1e9 100644 --- a/arch/arm/mach-tegra/dvfs.c +++ b/arch/arm/mach-tegra/dvfs.c @@ -336,11 +336,11 @@ __tegra_dvfs_set_rate(struct dvfs *d, unsigned long rate) if (freqs == NULL || d->millivolts == NULL) return -ENODEV; - if (rate > freqs[d->num_freqs - 1]) { - pr_warn("tegra_dvfs: rate %lu too high for dvfs on %s\n", rate, - d->clk_name); - return -EINVAL; - } +// if (rate > freqs[d->num_freqs - 1]) { +// pr_warn("tegra_dvfs: rate %lu too high for dvfs on %s\n", rate, +// d->clk_name); +// return -EINVAL; +// } if (rate == 0) { d->cur_millivolts = 0; @@ -348,12 +348,12 @@ __tegra_dvfs_set_rate(struct dvfs *d, unsigned long rate) while (i < d->num_freqs && rate > freqs[i]) i++; - if ((d->max_millivolts) && - (d->millivolts[i] > d->max_millivolts)) { - pr_warn("tegra_dvfs: voltage %d too high for dvfs on" - " %s\n", d->millivolts[i], d->clk_name); - return -EINVAL; - } +// if ((d->max_millivolts) && +// (d->millivolts[i] > d->max_millivolts)) { +// pr_warn("tegra_dvfs: voltage %d too high for dvfs on" +// " %s\n", d->millivolts[i], d->clk_name); +// return -EINVAL; +// } d->cur_millivolts = d->millivolts[i]; } From 99c1cd2d797cd9aa7fc0c268dadee58eaea87b9a Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 23:14:14 -0500 Subject: [PATCH 244/678] mach-tegra: cpu-tegra3.c: lower hotplug delays --- arch/arm/mach-tegra/cpu-tegra3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 4d8a5219879..f08a3e8d55d 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,9 +39,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 1000 -#define UP2Gn_DELAY_MS 100 -#define DOWN_DELAY_MS 500 +#define UP2G0_DELAY_MS 200 +#define UP2Gn_DELAY_MS 200 +#define DOWN_DELAY_MS 1000 static struct mutex *tegra3_cpu_lock; From 879ef2ec11310e267e4fd618fce39a2a68f619ab Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 23:14:57 -0500 Subject: [PATCH 245/678] mach-tegra: tegra3_dvfs.c: sbus and usb dvfs tests --- arch/arm/mach-tegra/tegra3_dvfs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index ed4a3b223a1..56ec7c76df0 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -262,8 +262,8 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("emc", emc_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), CORE_DVFS("sbus", core_millivolts, 0, 1, KHZ, 1, 136000, 164000, 191000, 216000, 216000, 216000, 216000, 216000), - CORE_DVFS("sbus", core_millivolts, 1, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 267000, 267000, 267000), - CORE_DVFS("sbus", core_millivolts, 2, 1, KHZ, 51000, 205000, 205000, 227000, 227000, 267000, 334000, 334000, 334000), + CORE_DVFS("sbus", core_millivolts, 1, 1, KHZ, 205000, 205000, 205000, 227000, 227000, 267000, 267000, 267000, 267000), + CORE_DVFS("sbus", core_millivolts, 2, 1, KHZ, 205000, 205000, 205000, 227000, 227000, 267000, 334000, 334000, 334000), CORE_DVFS("sbus", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 378000, 378000, 378000), CORE_DVFS("vi", core_millivolts, 0, 1, KHZ, 1, 216000, 285000, 300000, 300000, 300000, 300000, 300000, 300000), @@ -488,9 +488,9 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("sbc5", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), CORE_DVFS("sbc6", core_millivolts, -1, 1, KHZ, 1, 52000, 60000, 60000, 60000, 100000, 100000, 100000, 100000), - CORE_DVFS("usbd", core_millivolts, -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - CORE_DVFS("usb2", core_millivolts, -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), - CORE_DVFS("usb3", core_millivolts, -1, 1, KHZ, 1, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + CORE_DVFS("usbd", core_millivolts, -1, 1, KHZ, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + CORE_DVFS("usb2", core_millivolts, -1, 1, KHZ, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), + CORE_DVFS("usb3", core_millivolts, -1, 1, KHZ, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000, 480000), CORE_DVFS("sata", core_millivolts, -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), CORE_DVFS("sata_oob", core_millivolts, -1, 1, KHZ, 1, 216000, 216000, 216000, 216000, 216000, 216000, 216000, 216000), @@ -1145,3 +1145,4 @@ static int __init tegra_dvfs_init_core_cap(void) return 0; } late_initcall(tegra_dvfs_init_core_cap); + From eccf5bf5d7c15378cc6d316ee6f4365aff160f51 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 7 Dec 2012 23:16:37 -0500 Subject: [PATCH 246/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 22a976421c8..a4b0ec7cf29 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-446-666" +CONFIG_LOCALVERSION="-MKernel-446-666-LPonly" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -312,6 +312,7 @@ CONFIG_TEGRA_FIQ_DEBUGGER=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_VOLTAGE_CONTROL=y CONFIG_GPU_OVERCLOCK=y +# CONFIG_GPU_OC_332 is not set CONFIG_GPU_OC_446=y # CONFIG_GPU_OC_484 is not set # CONFIG_GPU_OC_520 is not set @@ -323,6 +324,9 @@ CONFIG_LP_OVERCLOCK=y # CONFIG_LP_OC_620 is not set CONFIG_LP_OC_666=y # CONFIG_LP_OC_700 is not set +# CONFIG_LP_OC_740 is not set +# CONFIG_LP_ONLY is not set +CONFIG_AUDIO_MIN_PERFLOCK=y CONFIG_TEGRA_CPU_DVFS=y CONFIG_TEGRA_CORE_DVFS=y CONFIG_TEGRA_IOVMM_SMMU=y @@ -352,6 +356,7 @@ CONFIG_TEGRA_BB_XMM_POWER=y CONFIG_TEGRA_PLLM_RESTRICTED=y # CONFIG_TEGRA_WDT_RECOVERY is not set CONFIG_TEGRA_LP2_ARM_TWD=y +# CONFIG_TEGRA_RAIL_OFF_MULTIPLE_CPUS is not set CONFIG_TEGRA_SLOW_CSITE=y # CONFIG_TEGRA_PREINIT_CLOCKS is not set From 4d972b35862e1dd8c3060b716e8c064a07c9b29c Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 8 Dec 2012 00:40:56 -0500 Subject: [PATCH 247/678] Revert "ARM: tegra: Remove timer workaround" This reverts commit b6cef1b018e34038254380c96a83edb847ce1aa8. --- arch/arm/mach-tegra/cpuidle.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-tegra/cpuidle.c b/arch/arm/mach-tegra/cpuidle.c index defe5ac71e5..0e0ec7f2dbf 100644 --- a/arch/arm/mach-tegra/cpuidle.c +++ b/arch/arm/mach-tegra/cpuidle.c @@ -128,6 +128,9 @@ static int tegra_idle_enter_lp2(struct cpuidle_device *dev, local_irq_enable(); + /* cpu clockevents may have been reset by powerdown */ + hrtimer_peek_ahead_timers(); + smp_rmb(); /* Update LP2 latency provided no fall back to LP3 */ From 8f03f243146388aee669d99d927f4a981e5d381e Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 8 Dec 2012 02:19:56 -0500 Subject: [PATCH 248/678] Revert "ARM: tegra: clock: Set SCLK floor for CPU mode switch" This reverts commit b60bf940e6d749fa5fdb3174892cc763201010c3. --- arch/arm/mach-tegra/common.c | 290 ++++++++++++++++++---------- arch/arm/mach-tegra/tegra3_clocks.c | 10 +- 2 files changed, 192 insertions(+), 108 deletions(-) diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index aa6f0192c5d..4eabbdc436d 100755 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -48,6 +47,8 @@ #include "reset.h" #include "devices.h" +#define PMC_SCRATCH37 0x130 + #define MC_SECURITY_CFG2 0x7c #define AHB_ARBITRATION_PRIORITY_CTRL 0x4 @@ -63,6 +64,7 @@ #define RECOVERY_MODE BIT(31) #define BOOTLOADER_MODE BIT(30) #define FORCED_RECOVERY_MODE BIT(1) +#define GO_TO_CHARGER_MODE (0xA5A55A5A) #define AHB_GIZMO_USB 0x1c #define AHB_GIZMO_USB2 0x78 @@ -101,7 +103,7 @@ static struct board_info pmu_board_info; static struct board_info display_board_info; static struct board_info camera_board_info; -static int pmu_core_edp = 1200; /* default 1.2V EDP limit */ +static int pmu_core_edp = 1700; /* default 1.2V EDP limit */ static int board_panel_type; static enum power_supply_type pow_supply_type = POWER_SUPPLY_TYPE_MAINS; @@ -135,6 +137,15 @@ void tegra_assert_system_reset(char mode, const char *cmd) reg &= ~(BOOTLOADER_MODE | RECOVERY_MODE | FORCED_RECOVERY_MODE); } writel_relaxed(reg, reset + PMC_SCRATCH0); + + if (cmd && !strcmp(cmd, "chrager-mode")) + { + reg = readl_relaxed(reset + PMC_SCRATCH37); + reg = GO_TO_CHARGER_MODE; + writel_relaxed(GO_TO_CHARGER_MODE, reset + PMC_SCRATCH37); + //printk("tegra_assert_system_reset reg =%x",reg ); + } + /* use *_related to avoid spinlock since caches are off */ reg = readl_relaxed(reset); reg |= 0x10; @@ -142,7 +153,6 @@ void tegra_assert_system_reset(char mode, const char *cmd) #endif } static int modem_id; -static int commchip_id; static int sku_override; static int debug_uart_port_id; static enum audio_codec_type audio_codec_name; @@ -155,11 +165,6 @@ static int max_cpu_current; static __initdata struct tegra_clk_init_table common_clk_init_table[] = { /* name parent rate enabled */ { "clk_m", NULL, 0, true }, - { "emc", NULL, 0, true }, - { "cpu", NULL, 0, true }, - { "kfuse", NULL, 0, true }, - { "fuse", NULL, 0, true }, - { "sclk", NULL, 0, true }, #ifdef CONFIG_TEGRA_SILICON_PLATFORM #ifdef CONFIG_ARCH_TEGRA_2x_SOC { "pll_p", NULL, 216000000, true }, @@ -167,7 +172,7 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "pll_p_out2", "pll_p", 48000000, false }, { "pll_p_out3", "pll_p", 72000000, true }, { "pll_p_out4", "pll_p", 108000000, false }, - { "pll_m", "clk_m", 0, true }, + { "pll_m", "pll_ref", 0, true }, { "pll_m_out1", "pll_m", 120000000, true }, { "sclk", "pll_c_out1", 40000000, true }, { "hclk", "sclk", 40000000, true }, @@ -188,7 +193,14 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "sclk", "pll_p_out4", 102000000, true }, { "hclk", "sclk", 102000000, true }, { "pclk", "hclk", 51000000, true }, + { "wake.sclk", NULL, 40000000, true }, + { "sbc5.sclk", NULL, 40000000, false}, + { "sbc6.sclk", NULL, 40000000, false}, #endif + { "sbc1.sclk", NULL, 40000000, false}, + { "sbc2.sclk", NULL, 40000000, false}, + { "sbc3.sclk", NULL, 40000000, false}, + { "sbc4.sclk", NULL, 40000000, false}, #else { "pll_p", NULL, 216000000, true }, { "pll_p_out1", "pll_p", 28800000, false }, @@ -199,25 +211,25 @@ static __initdata struct tegra_clk_init_table common_clk_init_table[] = { { "sclk", "pll_p_out4", 108000000, true }, { "hclk", "sclk", 108000000, true }, { "pclk", "hclk", 54000000, true }, + { "pll_c", NULL, ULONG_MAX, false }, + { "pll_c_out1", "pll_c", 208000000, false }, #endif #ifdef CONFIG_TEGRA_SLOW_CSITE { "csite", "clk_m", 1000000, true }, #else { "csite", NULL, 0, true }, #endif + { "emc", NULL, 0, true }, + { "cpu", NULL, 0, true }, + { "kfuse", NULL, 0, true }, + { "fuse", NULL, 0, true }, { "pll_u", NULL, 480000000, false }, { "sdmmc1", "pll_p", 48000000, false}, { "sdmmc3", "pll_p", 48000000, false}, { "sdmmc4", "pll_p", 48000000, false}, - { "sbc1.sclk", NULL, 40000000, false}, - { "sbc2.sclk", NULL, 40000000, false}, - { "sbc3.sclk", NULL, 40000000, false}, - { "sbc4.sclk", NULL, 40000000, false}, + { "pll_a", "pll_p_out1", 0, false}, + { "pll_a_out0", "pll_a", 0, false}, #ifndef CONFIG_ARCH_TEGRA_2x_SOC - { "sbc5.sclk", NULL, 40000000, false}, - { "sbc6.sclk", NULL, 40000000, false}, - { "wake.sclk", NULL, 40000000, true }, - { "cpu_mode.sclk", NULL, 80000000, false }, { "cbus", "pll_c", 416000000, false }, { "pll_c_out1", "pll_c", 208000000, false }, { "mselect", "pll_p", 102000000, true }, @@ -301,6 +313,8 @@ void tegra_init_cache(bool init) { void __iomem *p = IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x3000; u32 aux_ctrl; + u32 speedo; + u32 tmp; #ifdef CONFIG_TRUSTED_FOUNDATIONS /* issue the SMC to enable the L2 */ @@ -327,8 +341,6 @@ void tegra_init_cache(bool init) writel(0x221, p + L2X0_TAG_LATENCY_CTRL); writel(0x221, p + L2X0_DATA_LATENCY_CTRL); } else { - u32 speedo; - /* relax l2-cache latency for speedos 4,5,6 (T33's chips) */ speedo = tegra_cpu_speedo_id(); if (speedo == 4 || speedo == 5 || speedo == 6 || @@ -345,15 +357,12 @@ void tegra_init_cache(bool init) writel(0x770, p + L2X0_DATA_LATENCY_CTRL); #endif #endif - writel(0x3, p + L2X0_POWER_CTRL); aux_ctrl = readl(p + L2X0_CACHE_TYPE); aux_ctrl = (aux_ctrl & 0x700) << (17-8); aux_ctrl |= 0x7C000001; if (init) { l2x0_init(p, aux_ctrl, 0x8200c3fe); } else { - u32 tmp; - tmp = aux_ctrl; aux_ctrl = readl(p + L2X0_AUX_CTRL); aux_ctrl &= 0x8200c3fe; @@ -635,16 +644,14 @@ enum audio_codec_type get_audio_codec_type(void) } __setup("audio_codec=", tegra_audio_codec_type); - void tegra_get_board_info(struct board_info *bi) { - bi->board_id = (system_serial_high >> 16) & 0xFFFF; - bi->sku = (system_serial_high) & 0xFFFF; - bi->fab = (system_serial_low >> 24) & 0xFF; - bi->major_revision = (system_serial_low >> 16) & 0xFF; - bi->minor_revision = (system_serial_low >> 8) & 0xFF; + bi->board_id = 0xF41; + bi->sku = 0xA00; + bi->fab =0x1; + bi->major_revision = 0x044; + bi->minor_revision = 0x2; } - static int __init tegra_pmu_board_info(char *info) { char *p = info; @@ -658,6 +665,7 @@ static int __init tegra_pmu_board_info(char *info) void tegra_get_pmu_board_info(struct board_info *bi) { + pmu_board_info.sku = 0x1; memcpy(bi, &pmu_board_info, sizeof(struct board_info)); } @@ -714,22 +722,6 @@ int tegra_get_modem_id(void) __setup("modem_id=", tegra_modem_id); -static int __init tegra_commchip_id(char *id) -{ - char *p = id; - - if (get_option(&p, &commchip_id) != 1) - return 0; - return 1; -} - -int tegra_get_commchip_id(void) -{ - return commchip_id; -} - -__setup("commchip_id=", tegra_commchip_id); - /* * Tegra has a protected aperture that prevents access by most non-CPU * memory masters to addresses above the aperture value. Enabling it @@ -797,9 +789,28 @@ void tegra_move_framebuffer(unsigned long to, unsigned long from, iounmap(to_io); } +#ifdef CONFIG_TEGRA_SMMU_BASE_AT_E0000000 +#define FORCE_SMMU_BASE_FOR_TEGRA3_A01 1 +#else +#define FORCE_SMMU_BASE_FOR_TEGRA3_A01 0 +#endif +#if FORCE_SMMU_BASE_FOR_TEGRA3_A01 || \ + (defined(CONFIG_TEGRA_IOVMM_SMMU) && defined(CONFIG_ARCH_TEGRA_3x_SOC)) +/* Support for Tegra3 A01 chip mask that needs to have SMMU IOVA reside in + * the upper half of 4GB IOVA space. A02 and after use the bottom 1GB and + * do not need to reserve memory. + */ +#define SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 +#endif + void __init tegra_reserve(unsigned long carveout_size, unsigned long fb_size, unsigned long fb2_size) { +#ifdef SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 + int smmu_reserved = 0; + struct tegra_smmu_window *smmu_window = tegra_smmu_window(0); +#endif + if (carveout_size) { tegra_carveout_start = memblock_end_of_DRAM() - carveout_size; if (memblock_remove(tegra_carveout_start, carveout_size)) { @@ -845,6 +856,33 @@ void __init tegra_reserve(unsigned long carveout_size, unsigned long fb_size, if (tegra_carveout_size && tegra_carveout_start < tegra_grhost_aperture) tegra_grhost_aperture = tegra_carveout_start; +#ifdef SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 + if (!smmu_window) { + pr_err("No SMMU resource\n"); + } else { + size_t smmu_window_size; + + if (FORCE_SMMU_BASE_FOR_TEGRA3_A01 || + (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3 && + tegra_get_revision() == TEGRA_REVISION_A01)) { + smmu_window->start = TEGRA_SMMU_BASE_TEGRA3_A01; + smmu_window->end = TEGRA_SMMU_BASE_TEGRA3_A01 + + TEGRA_SMMU_SIZE_TEGRA3_A01 - 1; + } + smmu_window_size = smmu_window->end + 1 - smmu_window->start; + if (smmu_window->start >= 0x80000000) { + if (memblock_reserve(smmu_window->start, + smmu_window_size)) + pr_err( + "Failed to reserve SMMU I/O VA window %08lx@%08lx\n", + (unsigned long)smmu_window_size, + (unsigned long)smmu_window->start); + else + smmu_reserved = 1; + } + } +#endif + if (tegra_lp0_vec_size && (tegra_lp0_vec_start < memblock_end_of_DRAM())) { if (memblock_reserve(tegra_lp0_vec_start, tegra_lp0_vec_size)) { @@ -898,6 +936,12 @@ void __init tegra_reserve(unsigned long carveout_size, unsigned long fb_size, tegra_vpr_start, tegra_vpr_size ? tegra_vpr_start + tegra_vpr_size - 1 : 0); + +#ifdef SUPPORT_SMMU_BASE_FOR_TEGRA3_A01 + if (smmu_reserved) + pr_info("SMMU: %08lx - %08lx\n", + smmu_window->start, smmu_window->end); +#endif } static struct resource ram_console_resources[] = { @@ -917,13 +961,20 @@ void __init tegra_ram_console_debug_reserve(unsigned long ram_console_size) { struct resource *res; long ret; + unsigned long real_start, real_size; res = platform_get_resource(&ram_console_device, IORESOURCE_MEM, 0); if (!res) goto fail; + res->start = memblock_end_of_DRAM() - ram_console_size; res->end = res->start + ram_console_size - 1; - ret = memblock_remove(res->start, ram_console_size); + + // Register an extra 1M before ramconsole to store kexec stuff + real_start = res->start - SZ_1M; + real_size = ram_console_size + SZ_1M; + + ret = memblock_remove(real_start, real_size); if (ret) goto fail; @@ -955,77 +1006,118 @@ void __init tegra_release_bootloader_fb(void) } #ifdef CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND -char cpufreq_default_gov[CONFIG_NR_CPUS][MAX_GOV_NAME_LEN]; -char *cpufreq_conservative_gov = "conservative"; +static char cpufreq_gov_default[32]; +static char *cpufreq_gov_conservative = "conservative"; +static char *cpufreq_sysfs_place_holder="/sys/devices/system/cpu/cpu%i/cpufreq/scaling_governor"; +static char *cpufreq_gov_conservative_param="/sys/devices/system/cpu/cpufreq/conservative/%s"; -void cpufreq_store_default_gov(void) +static void cpufreq_set_governor(char *governor) { - unsigned int cpu = 0; - struct cpufreq_policy *policy; + struct file *scaling_gov = NULL; + mm_segment_t old_fs; + char buf[128]; + int i = 0; + loff_t offset = 0; + if (governor == NULL) + return; + + /* change to KERNEL_DS address limit */ + old_fs = get_fs(); + set_fs(KERNEL_DS); #ifndef CONFIG_TEGRA_AUTO_HOTPLUG - for_each_online_cpu(cpu) + for_each_online_cpu(i) #endif { - policy = cpufreq_cpu_get(cpu); - if (policy && policy->governor) { - sprintf(cpufreq_default_gov[cpu], "%s", - policy->governor->name); - cpufreq_cpu_put(policy); + sprintf(buf, cpufreq_sysfs_place_holder, i); + scaling_gov = filp_open(buf, O_RDWR, 0); + if (scaling_gov != NULL) { + if (scaling_gov->f_op != NULL && + scaling_gov->f_op->write != NULL) + scaling_gov->f_op->write(scaling_gov, + governor, + strlen(governor), + &offset); + else + pr_err("f_op might be null\n"); + + filp_close(scaling_gov, NULL); } else { - /* No policy or no gov set for this - * online cpu. If we are here, require - * serious debugging hence setting - * as pr_error. - */ - pr_err("No gov or No policy for online cpu:%d," - , cpu); + pr_err("%s. Can't open %s\n", __func__, buf); } } + set_fs(old_fs); } -void cpufreq_change_gov(char *target_gov) +void cpufreq_save_default_governor(void) { - int ret = -EINVAL; - unsigned int cpu = 0; + struct file *scaling_gov = NULL; + mm_segment_t old_fs; + char buf[128]; + loff_t offset = 0; + + /* change to KERNEL_DS address limit */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + + buf[127] = 0; + sprintf(buf, cpufreq_sysfs_place_holder,0); + scaling_gov = filp_open(buf, O_RDONLY, 0); + if (scaling_gov != NULL) { + if (scaling_gov->f_op != NULL && + scaling_gov->f_op->read != NULL) + scaling_gov->f_op->read(scaling_gov, + cpufreq_gov_default, + 32, + &offset); + else + pr_err("f_op might be null\n"); -#ifndef CONFIG_TEGRA_AUTO_HOTPLUG - for_each_online_cpu(cpu) -#endif - { - ret = cpufreq_set_gov(target_gov, cpu); - if (ret < 0) - /* Unable to set gov for the online cpu. - * If it happens, needs to debug. - */ - pr_info("Unable to set gov:%s for online cpu:%d," - , cpufreq_default_gov[cpu] - , cpu); + filp_close(scaling_gov, NULL); + } else { + pr_err("%s. Can't open %s\n", __func__, buf); } + set_fs(old_fs); } -void cpufreq_restore_default_gov(void) +void cpufreq_restore_default_governor(void) { - int ret = -EINVAL; - unsigned int cpu = 0; + cpufreq_set_governor(cpufreq_gov_default); +} -#ifndef CONFIG_TEGRA_AUTO_HOTPLUG - for_each_online_cpu(cpu) -#endif - { - if (&cpufreq_default_gov[cpu] && - strlen((const char *)&cpufreq_default_gov[cpu])) { - ret = cpufreq_set_gov(cpufreq_default_gov[cpu], cpu); - if (ret < 0) - /* Unable to restore gov for the cpu as - * It was online on suspend and becomes - * offline on resume. - */ - pr_info("Unable to restore gov:%s for cpu:%d," - , cpufreq_default_gov[cpu] - , cpu); - } - cpufreq_default_gov[cpu][0] = '\0'; +void cpufreq_set_conservative_governor_param(char *name, int value) +{ + struct file *gov_param = NULL; + mm_segment_t old_fs; + static char buf[128], param_value[8]; + loff_t offset = 0; + + /* change to KERNEL_DS address limit */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + + sprintf(param_value, "%d", value); + sprintf(buf, cpufreq_gov_conservative_param, name); + gov_param = filp_open(buf, O_RDWR, 0); + if (gov_param != NULL) { + if (gov_param->f_op != NULL && + gov_param->f_op->write != NULL) + gov_param->f_op->write(gov_param, + param_value, + strlen(param_value), + &offset); + else + pr_err("f_op might be null\n"); + + filp_close(gov_param, NULL); + } else { + pr_err("%s. Can't open %s\n", __func__, buf); } + set_fs(old_fs); +} + +void cpufreq_set_conservative_governor(void) +{ + cpufreq_set_governor(cpufreq_gov_conservative); } #endif /* CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND */ diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 67be8d31d47..8cb23fa8460 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -309,7 +309,6 @@ static int tegra3_clk_shared_bus_update(struct clk *bus); static unsigned long cpu_stay_on_backup_max; static struct clk *emc_bridge; -static struct clk *cpu_mode_sclk; static bool detach_shared_bus; module_param(detach_shared_bus, bool, 0644); @@ -1046,8 +1045,6 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) flags |= (p->u.cpu.mode == MODE_LP) ? TEGRA_POWER_CLUSTER_LP : TEGRA_POWER_CLUSTER_G; - clk_enable(cpu_mode_sclk); /* set SCLK floor for cluster switch */ - /* Since in both LP and G mode CPU main and backup sources are the same, set rate on the new parent just synchronizes super-clock muxes before mode switch with no PLL re-locking */ @@ -1055,7 +1052,6 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) if (ret) { pr_err("%s: Failed to set rate %lu for %s\n", __func__, rate, p->name); - clk_disable(cpu_mode_sclk); return ret; } @@ -1071,7 +1067,6 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) clk_disable(p); pr_err("%s: Failed to switch %s mode to %s\n", __func__, c->name, p->name); - clk_disable(cpu_mode_sclk); return ret; } @@ -1080,7 +1075,6 @@ static int tegra3_cpu_cmplx_clk_set_parent(struct clk *c, struct clk *p) clk_disable(c->parent); clk_reparent(c, p); - clk_disable(cpu_mode_sclk); return 0; } @@ -4342,8 +4336,7 @@ struct clk tegra_list_clks[] = { SHARED_CLK("usb1.sclk", "tegra-ehci.0", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("usb2.sclk", "tegra-ehci.1", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("usb3.sclk", "tegra-ehci.2", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), - SHARED_CLK("wake.sclk", "wake_sclk", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), - SHARED_CLK("cpu_mode.sclk","cpu_mode", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), + SHARED_CLK("wake.sclk", "wake_sclk", "sclk", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("mon.avp", "tegra_actmon", "avp", &tegra_clk_sbus_cmplx, NULL, 0, 0), SHARED_CLK("cap.sclk", "cap_sclk", NULL, &tegra_clk_sbus_cmplx, NULL, 0, SHARED_CEILING), SHARED_CLK("floor.sclk", "floor_sclk", NULL, &tegra_clk_sbus_cmplx, NULL, 0, 0), @@ -5293,7 +5286,6 @@ void __init tegra_soc_init_clocks(void) tegra3_init_one_clock(&tegra_clk_out_list[i]); emc_bridge = &tegra_clk_emc_bridge; - cpu_mode_sclk = tegra_get_clock_by_name("cpu_mode.sclk"); /* Initialize to default */ tegra_init_cpu_edp_limits(0); From 3c89536a88b986105efed8dd823a7933c6258f83 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 9 Dec 2012 19:28:59 -0500 Subject: [PATCH 249/678] mach-tegra: cpu-tegra3.c: set delays to 5:1:5 ratio --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index f08a3e8d55d..cdde7ef7e2b 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,7 +39,7 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 200 +#define UP2G0_DELAY_MS 1000 #define UP2Gn_DELAY_MS 200 #define DOWN_DELAY_MS 1000 From 8f6da0b6b6e211e3cc2cbed8462e216fe415b7f6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 16 Dec 2012 16:52:54 -0500 Subject: [PATCH 250/678] arm: mach-tegra: up 550MHz LP OC up to 555MHz Because is looks cooler. --- arch/arm/mach-tegra/Kconfig | 6 +++--- arch/arm/mach-tegra/tegra3_clocks.c | 4 ++-- arch/arm/mach-tegra/tegra3_dvfs.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index c19d7720d3b..cc4137531bf 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -328,14 +328,14 @@ choice depends on LP_OVERCLOCK prompt "Maximum LP Rate" - default GPU_OC_620 + default GPU_OC_666 ---help--- Select the desired LP overclock rate. If you are not sure what you are doing, leave this option alone! - config LP_OC_550 - bool "550 MHz" + config LP_OC_555 + bool "555 MHz" config LP_OC_620 bool "620 MHz" config LP_OC_666 diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 8cb23fa8460..ae18e7f5849 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4669,8 +4669,8 @@ static struct cpufreq_frequency_table freq_table_1p6GHz[] = { #ifdef CONFIG_LP_OC_620 { 5, 620000 }, #endif -#ifdef CONFIG_LP_OC_550 - { 5, 550000 }, +#ifdef CONFIG_LP_OC_555 + { 5, 555000 }, #endif #else { 5, 620000 }, diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 56ec7c76df0..8f1ff7e83b2 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -235,8 +235,8 @@ static struct dvfs core_dvfs_table[] = { /* Clock limits for internal blocks, PLLs */ CORE_DVFS("cpu_lp", lp_cpu_millivolts, 0, 1, KHZ, 1, 294000, 342000, 427000, 475000, 500000, 500000, 500000, 500000), #ifdef CONFIG_LP_OVERCLOCK -#ifdef CONFIG_LP_OC_550 - CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 550000, 550000, 550000, 550000, 550000), +#ifdef CONFIG_LP_OC_555 + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 555000, 555000, 555000, 555000, 555000), #endif #ifdef CONFIG_LP_OC_620 CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), From 5a711d2dd2c841d878dd7c0e1c744c16dcbb194d Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 16 Dec 2012 16:54:58 -0500 Subject: [PATCH 251/678] mach-tegra: board-grouper-panel.c: test backlight changes --- arch/arm/mach-tegra/board-grouper-panel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index f7f6fe2dc15..b169f8905c3 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -102,7 +102,7 @@ static tegra_dc_bl_output grouper_bl_output_measured = { /* 0 - 15 */ 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, /* 16 - 31 */ - 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, + 11, 12, 13, 13, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, /* 32 - 47 */ 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, /* 48 - 63 */ @@ -443,7 +443,7 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 5, + .panel_min_brightness = 19, .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, From 9973daec4896c73aa97d4e5d07545ef9b6cf1b1e Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 16 Dec 2012 17:00:46 -0500 Subject: [PATCH 252/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 30 ++++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index a4b0ec7cf29..2beb22e4f03 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,14 +38,14 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-446-666-LPonly" +CONFIG_LOCALVERSION="-MKernel-446-666" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_LZO=y -# CONFIG_KERNEL_GZIP is not set +CONFIG_KERNEL_GZIP=y # CONFIG_KERNEL_LZMA is not set -CONFIG_KERNEL_LZO=y +# CONFIG_KERNEL_LZO is not set CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SWAP=y # CONFIG_SYSVIPC is not set @@ -320,7 +320,7 @@ CONFIG_GPU_OC_446=y # CONFIG_GPU_OC_666 is not set # CONFIG_GPU_OC_700 is not set CONFIG_LP_OVERCLOCK=y -# CONFIG_LP_OC_550 is not set +# CONFIG_LP_OC_555 is not set # CONFIG_LP_OC_620 is not set CONFIG_LP_OC_666=y # CONFIG_LP_OC_700 is not set @@ -353,7 +353,7 @@ CONFIG_TEGRA_EDP_EXACT_FREQ=y CONFIG_TEGRA_BB_XMM_POWER=y # CONFIG_TEGRA_BB_XMM_POWER2 is not set # CONFIG_TEGRA_THERMAL_SYSFS is not set -CONFIG_TEGRA_PLLM_RESTRICTED=y +# CONFIG_TEGRA_PLLM_RESTRICTED is not set # CONFIG_TEGRA_WDT_RECOVERY is not set CONFIG_TEGRA_LP2_ARM_TWD=y # CONFIG_TEGRA_RAIL_OFF_MULTIPLE_CPUS is not set @@ -926,6 +926,7 @@ CONFIG_NET_ACT_MIRRED=y # CONFIG_NET_CLS_IND is not set CONFIG_NET_SCH_FIFO=y # CONFIG_DCB is not set +CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y @@ -3009,6 +3010,7 @@ CONFIG_TMPFS=y CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set CONFIG_HFS_FS=y CONFIG_HFSPLUS_FS=y # CONFIG_BEFS_FS is not set @@ -3028,19 +3030,28 @@ CONFIG_HFSPLUS_FS=y # CONFIG_UFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y -# CONFIG_NFS_V3 is not set -# CONFIG_NFS_V4 is not set -CONFIG_ROOT_NFS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +CONFIG_NFS_V4=y +# CONFIG_NFS_V4_1 is not set +# CONFIG_ROOT_NFS is not set +# CONFIG_NFS_USE_LEGACY_DNS is not set +CONFIG_NFS_USE_KERNEL_DNS=y +# CONFIG_NFS_USE_NEW_IDMAPPER is not set # CONFIG_NFSD is not set CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y # CONFIG_CEPH_FS is not set CONFIG_CIFS=y # CONFIG_CIFS_STATS is not set # CONFIG_CIFS_WEAK_PW_HASH is not set +# CONFIG_CIFS_UPCALL is not set # CONFIG_CIFS_XATTR is not set # CONFIG_CIFS_DEBUG2 is not set +# CONFIG_CIFS_DFS_UPCALL is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -3198,7 +3209,8 @@ CONFIG_HAVE_ARCH_KGDB=y # # Security options # -# CONFIG_KEYS is not set +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set # CONFIG_SECURITY is not set # CONFIG_SECURITYFS is not set From ae56ef758590757a12dcee701b32b11534e3838a Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 21 Dec 2012 17:35:53 -0500 Subject: [PATCH 253/678] mach-tegra: tegra3_dvfs.c: incorporate host1x, cbus, and pllc int avp voltages --- arch/arm/mach-tegra/tegra3_dvfs.c | 60 +++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 8f1ff7e83b2..cdaeddb502c 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -371,88 +371,88 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("3d2", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 484000, 484000, 484000), CORE_DVFS("se", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 625000, 625000, 625000), - CORE_DVFS("host1x", core_millivolts, 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), + CORE_DVFS("host1x", avp_millivolts, 0, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_332 - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 167000, 167000, 167000, 167000, 167000, 167000, 167000, 167000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 167000, 167000, 167000, 167000, 167000, 167000, 167000, 167000), #endif #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 223000, 223000, 223000, 223000, 223000, 223000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 223000, 223000, 223000, 223000, 223000, 223000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 242000, 242000, 242000, 242000, 242000, 242000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 242000, 242000, 242000, 242000, 242000, 242000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 260000, 260000, 260000, 260000, 260000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 260000, 260000, 260000, 260000, 260000), #endif #ifdef CONFIG_GPU_OC_600 - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 300000, 300000, 300000, 300000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 300000, 300000, 300000, 300000), #endif #ifdef CONFIG_GPU_OC_666 - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 333000, 333000, 333000, 333000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 333000, 333000, 333000, 333000), #endif #ifdef CONFIG_GPU_OC_700 - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 350000, 350000, 350000, 350000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 350000, 350000, 350000, 350000), #endif #else - CORE_DVFS("host1x", core_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), + CORE_DVFS("host1x", avp_millivolts, 1, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 267000), #endif - CORE_DVFS("host1x", core_millivolts, 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), - CORE_DVFS("host1x", core_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), + CORE_DVFS("host1x", avp_millivolts, 2, 1, KHZ, 1, 152000, 188000, 222000, 254000, 267000, 267000, 267000, 300000), + CORE_DVFS("host1x", avp_millivolts, 3, 1, KHZ, 1, 1, 1, 1, 1, 1, 242000, 242000, 242000), - CORE_DVFS("cbus", core_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("cbus", avp_millivolts, 0, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_332 - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 332000, 332000, 332000, 332000, 332000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 332000, 332000, 332000, 332000, 332000), #endif #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 446000, 446000, 446000, 446000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 484000, 484000, 484000, 484000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 520000, 520000, 520000, 520000), #endif #ifdef CONFIG_GPU_OC_600 - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 600000, 600000, 600000, 600000), #endif #ifdef CONFIG_GPU_OC_666 - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 666000, 666000, 666000, 666000), #endif #ifdef CONFIG_GPU_OC_700 - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 700000, 700000, 700000, 700000), #endif #else - CORE_DVFS("cbus", core_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), + CORE_DVFS("cbus", avp_millivolts, 1, 1, KHZ, 1, 228000, 275000, 332000, 380000, 416000, 416000, 416000, 416000), #endif - CORE_DVFS("cbus", core_millivolts, 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), - CORE_DVFS("cbus", core_millivolts, 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), + CORE_DVFS("cbus", avp_millivolts, 2, 1, KHZ, 1, 247000, 304000, 352000, 400000, 437000, 484000, 520000, 600000), + CORE_DVFS("cbus", avp_millivolts, 3, 1, KHZ, 1, 484000, 484000, 484000, 484000, 484000, 484000, 484000, 484000), #ifdef CONFIG_GPU_OVERCLOCK #ifdef CONFIG_GPU_OC_332 - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 667000, 667000, 667000, 667000, 667000, 667000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 667000, 667000, 667000, 667000, 667000, 667000), #endif #ifdef CONFIG_GPU_OC_446 - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 892000, 892000, 892000, 892000, 892000, 892000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 892000, 892000, 892000, 892000, 892000, 892000), #endif #ifdef CONFIG_GPU_OC_484 - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 968000, 968000, 968000, 968000, 968000, 968000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 968000, 968000, 968000, 968000, 968000, 968000), #endif #ifdef CONFIG_GPU_OC_520 - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 1040000, 1040000, 1040000, 1040000, 1040000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 1040000, 1040000, 1040000, 1040000, 1040000), #endif #ifdef CONFIG_GPU_OC_600 - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1200000, 1200000, 1200000, 1200000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1200000, 1200000, 1200000, 1200000), #endif #ifdef CONFIG_GPU_OC_666 - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1332000, 1332000, 1332000, 1332000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1332000, 1332000, 1332000, 1332000), #endif #ifdef CONFIG_GPU_OC_700 - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1400000, 1400000, 1400000, 1400000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1400000, 1400000, 1400000, 1400000), #endif #else - CORE_DVFS("pll_c", core_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), + CORE_DVFS("pll_c", avp_millivolts, -1, 1, KHZ, 533000, 667000, 667000, 800000, 800000, 1066000, 1066000, 1066000, 1200000), #endif /* * PLLM dvfs is common across all speedo IDs with one special exception From ed7524018a2a7f4904fa940f544d6eefe3e0b570 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 26 Dec 2012 18:34:25 -0500 Subject: [PATCH 254/678] Revert "ARM: cpuquiet: Go to and stay on G-CPU if min_cpus 1 is requested" This reverts commit b39458c042f3f3a0a53ad587d6a11e482b110949. --- arch/arm/mach-tegra/cpuquiet.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 4e176c70429..c4c75b42ea3 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -154,8 +154,7 @@ static void tegra_cpuquiet_work_func(struct work_struct *work) break; case TEGRA_CPQ_SWITCH_TO_LP: if (!is_lp_cluster() && !no_lp && - !pm_qos_request(PM_QOS_MIN_ONLINE_CPUS) - && num_online_cpus() == 1) { + num_online_cpus() == 1) { if (!clk_set_parent(cpu_clk, cpu_lp_clk)) { /*catch-up with governor target speed*/ tegra_cpu_set_speed_cap(NULL); @@ -221,7 +220,7 @@ static int min_cpus_notify(struct notifier_block *nb, unsigned long n, void *p) mutex_lock(tegra3_cpu_lock); - if ((n >= 1) && is_lp_cluster()) { + if ((n >= 2) && is_lp_cluster()) { /* make sure cpu rate is within g-mode range before switching */ unsigned long speed = max((unsigned long)tegra_getspeed(0), clk_get_min_rate(cpu_g_clk) / 1000); From 2a6c6b42be0784584751e43dca310d2a14c62ccf Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 30 Dec 2012 13:52:58 -0500 Subject: [PATCH 255/678] cpufreq: interactive.c: kang newest interactive --- drivers/cpufreq/cpufreq_interactive.c | 966 +++++++++++---------- include/trace/events/cpufreq_interactive.h | 58 +- 2 files changed, 539 insertions(+), 485 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 19daca8c5e4..1ac148b6929 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -19,48 +19,39 @@ #include #include #include -#include +#include +#include +#include #include #include #include #include #include #include -#include #include #include #include -#include -#include - -#include "../../arch/arm/mach-tegra/clock.h" -#include "../../arch/arm/mach-tegra/pm.h" #define CREATE_TRACE_POINTS #include -/* lpcpu variables */ -static struct clk *cpu_lp_clk; -static unsigned int idle_top_freq; - static atomic_t active_count = ATOMIC_INIT(0); struct cpufreq_interactive_cpuinfo { struct timer_list cpu_timer; - int timer_idlecancel; + struct timer_list cpu_slack_timer; + spinlock_t load_lock; /* protects the next 4 fields */ u64 time_in_idle; - u64 time_in_iowait; - u64 idle_exit_time; - u64 timer_run_time; - int idling; - u64 freq_change_time; - u64 freq_change_time_in_idle; - u64 freq_change_time_in_iowait; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; struct cpufreq_policy *policy; struct cpufreq_frequency_table *freq_table; unsigned int target_freq; unsigned int floor_freq; u64 floor_validate_time; + u64 hispeed_validate_time; + struct rw_semaphore enable_sem; int governor_enabled; }; @@ -71,55 +62,44 @@ static struct task_struct *speedchange_task; static cpumask_t speedchange_cpumask; static spinlock_t speedchange_cpumask_lock; -struct cpufreq_interactive_core_lock { - struct pm_qos_request_list qos_min_req; - struct pm_qos_request_list qos_max_req; - struct task_struct *lock_task; - struct work_struct unlock_work; - struct timer_list unlock_timer; - int request_active; - unsigned long lock_period; - struct mutex mutex; -}; - -/* default timeout for core lock down */ -#define DEFAULT_CORE_LOCK_PERIOD 200000 /* 200 ms */ - -static struct cpufreq_interactive_core_lock core_lock; - /* Hi speed to bump to from lo speed when load burst (default max) */ -static unsigned int hispeed_freq = 1300000; +static unsigned int hispeed_freq; -/* Go to hispeed_freq when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 85 -static unsigned long go_hispeed_load; +/* Go to hi speed when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 99 +static unsigned long go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; -/* Consider IO as busy */ -static unsigned long io_is_busy; +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 90 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; +static spinlock_t target_loads_lock; +static unsigned int *target_loads = default_target_loads; +static int ntarget_loads = ARRAY_SIZE(default_target_loads); /* * The minimum amount of time to spend at a frequency before we can ramp down. */ -#define DEFAULT_MIN_SAMPLE_TIME 60000; -static unsigned long min_sample_time; +#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) +static unsigned long min_sample_time = DEFAULT_MIN_SAMPLE_TIME; /* * The sample rate of the timer used to increase frequency */ -#define DEFAULT_TIMER_RATE 40000; -static unsigned long timer_rate; +#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) +static unsigned long timer_rate = DEFAULT_TIMER_RATE; /* * Wait this long before raising speed above hispeed, by default a single * timer interval. */ #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE -static unsigned long above_hispeed_delay_val; +static unsigned long above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; /* * Boost pulse to hispeed on touchscreen input. */ -static int input_boost_val = 1; + +static int input_boost_val; struct cpufreq_interactive_inputopen { struct input_handle *handle; @@ -129,10 +109,19 @@ struct cpufreq_interactive_inputopen { static struct cpufreq_interactive_inputopen inputopen; static struct workqueue_struct *inputopen_wq; +/* Non-zero means indefinite speed boost active */ +static int boost_val; +/* Duration of a boot pulse in usecs */ +static int boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; +/* End time of boost pulse in ktime converted to usecs */ +static u64 boostpulse_endtime; + /* - * Non-zero means longer-term speed boost active. + * Max additional time to wait in idle, beyond timer_rate, at speeds above + * minimum before wakeup to reduce speed, or -1 if unnecessary. */ -static int boost_val; +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) +static int timer_slack_val = DEFAULT_TIMER_SLACK; static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int event); @@ -147,156 +136,208 @@ struct cpufreq_governor cpufreq_gov_interactive = { .owner = THIS_MODULE, }; -static unsigned int cpufreq_interactive_get_target( - int cpu_load, int load_since_change, +static void cpufreq_interactive_timer_resched( struct cpufreq_interactive_cpuinfo *pcpu) { - unsigned int target_freq; + unsigned long expires = jiffies + usecs_to_jiffies(timer_rate); - /* - * Choose greater of short-term load (since last idle timer - * started or timer function re-armed itself) or long-term load - * (since last frequency change). - */ - if (load_since_change > cpu_load) - cpu_load = load_since_change; - - /* Jump boost policy */ - if (cpu_load >= go_hispeed_load || boost_val) { - if (pcpu->target_freq < hispeed_freq && - hispeed_freq < pcpu->policy->max) { - target_freq = hispeed_freq; - } else { - target_freq = pcpu->policy->max * cpu_load / 100; - - if (target_freq < hispeed_freq) - target_freq = hispeed_freq; - - if (pcpu->target_freq == hispeed_freq && - target_freq > hispeed_freq && - cputime64_sub(pcpu->timer_run_time, - pcpu->freq_change_time) - < above_hispeed_delay_val) { - - target_freq = pcpu->target_freq; - trace_cpufreq_interactive_notyet( - smp_processor_id(), - cpu_load, - pcpu->target_freq, - target_freq); + mod_timer_pinned(&pcpu->cpu_timer, expires); + if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(timer_slack_val); + mod_timer_pinned(&pcpu->cpu_slack_timer, expires); + } + + spin_lock(&pcpu->load_lock); + pcpu->time_in_idle = + get_cpu_idle_time_us(smp_processor_id(), + &pcpu->time_in_idle_timestamp); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + spin_unlock(&pcpu->load_lock); +} + +static unsigned int freq_to_targetload(unsigned int freq) +{ + int i; + unsigned int ret; + + spin_lock(&target_loads_lock); + + for (i = 0; i < ntarget_loads - 1 && freq >= target_loads[i+1]; i += 2) + ; + + ret = target_loads[i]; + spin_unlock(&target_loads_lock); + return ret; +} + +/* + * If increasing frequencies never map to a lower target load then + * choose_freq() will find the minimum frequency that does not exceed its + * target load given the current load. + */ + +static unsigned int choose_freq( + struct cpufreq_interactive_cpuinfo *pcpu, unsigned int loadadjfreq) +{ + unsigned int freq = pcpu->policy->cur; + unsigned int prevfreq, freqmin, freqmax; + unsigned int tl; + int index; + + freqmin = 0; + freqmax = UINT_MAX; + + do { + prevfreq = freq; + tl = freq_to_targetload(freq); + + /* + * Find the lowest frequency where the computed load is less + * than or equal to the target load. + */ + + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, loadadjfreq / tl, + CPUFREQ_RELATION_L, &index); + freq = pcpu->freq_table[index].frequency; + + if (freq > prevfreq) { + /* The previous frequency is too low. */ + freqmin = prevfreq; + + if (freq >= freqmax) { + /* + * Find the highest frequency that is less + * than freqmax. + */ + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmax - 1, CPUFREQ_RELATION_H, + &index); + freq = pcpu->freq_table[index].frequency; + + if (freq == freqmin) { + /* + * The first frequency below freqmax + * has already been found to be too + * low. freqmax is the lowest speed + * we found that is fast enough. + */ + freq = freqmax; + break; + } + } + } else if (freq < prevfreq) { + /* The previous frequency is high enough. */ + freqmax = prevfreq; + + if (freq <= freqmin) { + /* + * Find the lowest frequency that is higher + * than freqmin. + */ + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmin + 1, CPUFREQ_RELATION_L, + &index); + freq = pcpu->freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; } } - } else { - target_freq = idle_top_freq * cpu_load / 100; - } - target_freq = min(target_freq, pcpu->policy->max); - return target_freq; + /* If same frequency chosen as previous then done. */ + } while (freq != prevfreq); + + return freq; } -static inline cputime64_t get_cpu_iowait_time( - unsigned int cpu, cputime64_t *wall) +static u64 update_load(int cpu) { - u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + u64 now; + u64 now_idle; + unsigned int delta_idle; + unsigned int delta_time; + u64 active_time; - if (iowait_time == -1ULL) - return 0; + now_idle = get_cpu_idle_time_us(cpu, &now); + delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); + delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + active_time = delta_time - delta_idle; + pcpu->cputime_speedadj += active_time * pcpu->policy->cur; - return iowait_time; + pcpu->time_in_idle = now_idle; + pcpu->time_in_idle_timestamp = now; + return now; } static void cpufreq_interactive_timer(unsigned long data) { - unsigned int delta_idle; - unsigned int delta_iowait; + u64 now; unsigned int delta_time; + u64 cputime_speedadj; int cpu_load; - int load_since_change; - u64 time_in_idle; - u64 time_in_iowait; - u64 idle_exit_time; struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, data); - u64 now_idle; - u64 now_iowait; unsigned int new_freq; + unsigned int loadadjfreq; unsigned int index; unsigned long flags; + bool boosted; - smp_rmb(); - + if (!down_read_trylock(&pcpu->enable_sem)) + return; if (!pcpu->governor_enabled) goto exit; - /* - * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, - * this lets idle exit know the current idle time sample has - * been processed, and idle exit can generate a new sample and - * re-arm the timer. This prevents a concurrent idle - * exit on that CPU from writing a new set of info at the same time - * the timer function runs (the timer function can't use that info - * until more time passes). - */ - time_in_idle = pcpu->time_in_idle; - time_in_iowait = pcpu->time_in_iowait; - idle_exit_time = pcpu->idle_exit_time; - now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); - now_iowait = get_cpu_iowait_time(data, NULL); - smp_wmb(); - - /* If we raced with cancelling a timer, skip. */ - if (!idle_exit_time) - goto exit; - - delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle); - delta_iowait = (unsigned int) cputime64_sub(now_iowait, time_in_iowait); - delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, - idle_exit_time); + spin_lock(&pcpu->load_lock); + now = update_load(data); + delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); + cputime_speedadj = pcpu->cputime_speedadj; + spin_unlock(&pcpu->load_lock); - /* - * If timer ran less than 1ms after short-term sample started, retry. - */ - if (delta_time < 1000) + if (WARN_ON_ONCE(!delta_time)) goto rearm; - if (delta_idle > delta_time) - cpu_load = 0; - else { - if (io_is_busy && delta_idle >= delta_iowait) - delta_idle -= delta_iowait; + do_div(cputime_speedadj, delta_time); + loadadjfreq = (unsigned int)cputime_speedadj * 100; + cpu_load = loadadjfreq / pcpu->target_freq; + boosted = boost_val || now < boostpulse_endtime; - cpu_load = 100 * (delta_time - delta_idle) / delta_time; + if (cpu_load >= go_hispeed_load || boosted) { + if (pcpu->target_freq < hispeed_freq) { + new_freq = hispeed_freq; + } else { + new_freq = choose_freq(pcpu, loadadjfreq); + + if (new_freq < hispeed_freq) + new_freq = hispeed_freq; + } + } else { + new_freq = choose_freq(pcpu, loadadjfreq); } - delta_idle = (unsigned int) cputime64_sub(now_idle, - pcpu->freq_change_time_in_idle); - delta_iowait = (unsigned int) cputime64_sub(now_iowait, - pcpu->freq_change_time_in_iowait); - delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, - pcpu->freq_change_time); - - if ((delta_time == 0) || (delta_idle > delta_time)) - load_since_change = 0; - else { - if (io_is_busy && delta_idle >= delta_iowait) - delta_idle -= delta_iowait; - - load_since_change = - 100 * (delta_time - delta_idle) / delta_time; + if (pcpu->target_freq >= hispeed_freq && + new_freq > pcpu->target_freq && + now - pcpu->hispeed_validate_time < above_hispeed_delay_val) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + goto rearm; } - - /* - * Combine short-term load (since last idle timer started or timer - * function re-armed itself) and long-term load (since last frequency - * change) to determine new target frequency. - * - * This function implements the cpufreq scaling policy - */ - new_freq = cpufreq_interactive_get_target(cpu_load, load_since_change, - pcpu); + + pcpu->hispeed_validate_time = now; if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, - new_freq, CPUFREQ_RELATION_H, + new_freq, CPUFREQ_RELATION_L, &index)) { pr_warn_once("timer %d: cpufreq_frequency_table_target error\n", (int) data); @@ -310,28 +351,37 @@ static void cpufreq_interactive_timer(unsigned long data) * floor frequency for the minimum sample time since last validated. */ if (new_freq < pcpu->floor_freq) { - if (cputime64_sub(pcpu->timer_run_time, - pcpu->floor_validate_time) - < min_sample_time) { - - trace_cpufreq_interactive_notyet(data, cpu_load, - pcpu->target_freq, new_freq); + if (now - pcpu->floor_validate_time < min_sample_time) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); goto rearm; } } - pcpu->floor_freq = new_freq; - pcpu->floor_validate_time = pcpu->timer_run_time; + /* + * Update the timestamp for checking whether speed has been held at + * or above the selected frequency for a minimum of min_sample_time, + * if not boosted to hispeed_freq. If boosted to hispeed_freq then we + * allow the speed to drop as soon as the boostpulse duration expires + * (or the indefinite boost is turned off). + */ + + if (!boosted || new_freq > hispeed_freq) { + pcpu->floor_freq = new_freq; + pcpu->floor_validate_time = now; + } if (pcpu->target_freq == new_freq) { - trace_cpufreq_interactive_already(data, cpu_load, - pcpu->target_freq, new_freq); + trace_cpufreq_interactive_already( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); goto rearm_if_notmax; } trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, - new_freq); - + pcpu->policy->cur, new_freq); + pcpu->target_freq = new_freq; spin_lock_irqsave(&speedchange_cpumask_lock, flags); cpumask_set_cpu(data, &speedchange_cpumask); @@ -347,31 +397,11 @@ static void cpufreq_interactive_timer(unsigned long data) goto exit; rearm: - if (!timer_pending(&pcpu->cpu_timer)) { - /* - * If already at min: if that CPU is idle, don't set timer. - * Else cancel the timer if that CPU goes idle. We don't - * need to re-evaluate speed until the next idle exit. - */ - if (pcpu->target_freq == pcpu->policy->min) { - smp_rmb(); - - if (pcpu->idling) - goto exit; - - pcpu->timer_idlecancel = 1; - } - - pcpu->time_in_idle = get_cpu_idle_time_us( - data, &pcpu->idle_exit_time); - pcpu->time_in_iowait = get_cpu_iowait_time( - data, NULL); - - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } + if (!timer_pending(&pcpu->cpu_timer)) + cpufreq_interactive_timer_resched(pcpu); exit: + up_read(&pcpu->enable_sem); return; } @@ -381,15 +411,16 @@ static void cpufreq_interactive_idle_start(void) &per_cpu(cpuinfo, smp_processor_id()); int pending; - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); return; + } - pcpu->idling = 1; - smp_wmb(); pending = timer_pending(&pcpu->cpu_timer); if (pcpu->target_freq != pcpu->policy->min) { -#ifdef CONFIG_SMP /* * Entering idle while not at lowest speed. On some * platforms this can hold the other CPU(s) at that speed @@ -398,35 +429,11 @@ static void cpufreq_interactive_idle_start(void) * min indefinitely. This should probably be a quirk of * the CPUFreq driver. */ - if (!pending) { - pcpu->time_in_idle = get_cpu_idle_time_us( - smp_processor_id(), &pcpu->idle_exit_time); - pcpu->time_in_iowait = get_cpu_iowait_time( - smp_processor_id(), NULL); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } -#endif - } else { - /* - * If at min speed and entering idle after load has - * already been evaluated, and a timer has been set just in - * case the CPU suddenly goes busy, cancel that timer. The - * CPU didn't go busy; we'll recheck things upon idle exit. - */ - if (pending && pcpu->timer_idlecancel) { - del_timer(&pcpu->cpu_timer); - /* - * Ensure last timer run time is after current idle - * sample start time, so next idle exit will always - * start a new idle sampling period. - */ - pcpu->idle_exit_time = 0; - pcpu->timer_idlecancel = 0; - } + if (!pending) + cpufreq_interactive_timer_resched(pcpu); } + up_read(&pcpu->enable_sem); } static void cpufreq_interactive_idle_end(void) @@ -434,34 +441,23 @@ static void cpufreq_interactive_idle_end(void) struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, smp_processor_id()); - pcpu->idling = 0; - smp_wmb(); + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return; + } - /* - * Arm the timer for 1-2 ticks later if not already, and if the timer - * function has already processed the previous load sampling - * interval. (If the timer is not pending but has not processed - * the previous interval, it is probably racing with us on another - * CPU. Let it compute load based on the previous sample and then - * re-arm the timer for another interval when it's done, rather - * than updating the interval start time to be "now", which doesn't - * give the timer function enough time to make a decision on this - * run.) - */ - if (timer_pending(&pcpu->cpu_timer) == 0 && - pcpu->timer_run_time >= pcpu->idle_exit_time && - pcpu->governor_enabled) { - pcpu->time_in_idle = - get_cpu_idle_time_us(smp_processor_id(), - &pcpu->idle_exit_time); - pcpu->time_in_iowait = - get_cpu_iowait_time(smp_processor_id(), - NULL); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); + /* Arm the timer for 1-2 ticks later if not already. */ + if (!timer_pending(&pcpu->cpu_timer)) { + cpufreq_interactive_timer_resched(pcpu); + } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { + del_timer(&pcpu->cpu_timer); + del_timer(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer(smp_processor_id()); } + up_read(&pcpu->enable_sem); } static int cpufreq_interactive_speedchange_task(void *data) @@ -476,7 +472,8 @@ static int cpufreq_interactive_speedchange_task(void *data) spin_lock_irqsave(&speedchange_cpumask_lock, flags); if (cpumask_empty(&speedchange_cpumask)) { - spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + spin_unlock_irqrestore(&speedchange_cpumask_lock, + flags); schedule(); if (kthread_should_stop()) @@ -495,10 +492,12 @@ static int cpufreq_interactive_speedchange_task(void *data) unsigned int max_freq = 0; pcpu = &per_cpu(cpuinfo, cpu); - smp_rmb(); - - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) continue; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + continue; + } for_each_cpu(j, pcpu->policy->cpus) { struct cpufreq_interactive_cpuinfo *pjcpu = @@ -508,32 +507,25 @@ static int cpufreq_interactive_speedchange_task(void *data) max_freq = pjcpu->target_freq; } - __cpufreq_driver_target(pcpu->policy, - max_freq, - CPUFREQ_RELATION_H); + if (max_freq != pcpu->policy->cur) + __cpufreq_driver_target(pcpu->policy, + max_freq, + CPUFREQ_RELATION_H); + trace_cpufreq_interactive_setspeed(cpu, + pcpu->target_freq, + pcpu->policy->cur); - trace_cpufreq_interactive_setspeed(cpu, pcpu->target_freq, - pcpu->policy->cur); - - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(cpu, - &pcpu->freq_change_time); - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(cpu, NULL); + up_read(&pcpu->enable_sem); } } return 0; } -static unsigned int Touch_poke_attr[4] = {1100000, 860000, 0, 0}; - static void cpufreq_interactive_boost(void) { int i; int anyboost = 0; - unsigned int nr_cpus; - unsigned int input_boost_freq; unsigned long flags; struct cpufreq_interactive_cpuinfo *pcpu; @@ -542,24 +534,20 @@ static void cpufreq_interactive_boost(void) for_each_online_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - nr_cpus = num_online_cpus(); - - if (!is_lp_cluster()) { - input_boost_freq = Touch_poke_attr[nr_cpus-1]; - } else { - input_boost_freq = idle_top_freq; - } - if (pcpu->target_freq < input_boost_freq) { - pcpu->target_freq = input_boost_freq; + if (pcpu->target_freq < hispeed_freq) { + pcpu->target_freq = hispeed_freq; cpumask_set_cpu(i, &speedchange_cpumask); + pcpu->hispeed_validate_time = + ktime_to_us(ktime_get()); anyboost = 1; } - /* Set floor freq and (re)start timer for when last + /* + * Set floor freq and (re)start timer for when last * validated. */ - pcpu->floor_freq = input_boost_freq; + pcpu->floor_freq = hispeed_freq; pcpu->floor_validate_time = ktime_to_us(ktime_get()); } @@ -569,71 +557,113 @@ static void cpufreq_interactive_boost(void) wake_up_process(speedchange_task); } -static void cpufreq_interactive_core_lock_timer(unsigned long data) +static int cpufreq_interactive_notifier( + struct notifier_block *nb, unsigned long val, void *data) { - queue_work(inputopen_wq, &core_lock.unlock_work); -} + struct cpufreq_freqs *freq = data; + struct cpufreq_interactive_cpuinfo *pcpu; + int cpu; -static void cpufreq_interactive_unlock_cores(struct work_struct *wq) -{ - struct cpufreq_interactive_core_lock *cl = - container_of(wq, struct cpufreq_interactive_core_lock, - unlock_work); + if (val == CPUFREQ_POSTCHANGE) { + pcpu = &per_cpu(cpuinfo, freq->cpu); + if (!down_read_trylock(&pcpu->enable_sem)) + return 0; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return 0; + } - mutex_lock(&cl->mutex); + for_each_cpu(cpu, pcpu->policy->cpus) { + struct cpufreq_interactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, cpu); + spin_lock(&pjcpu->load_lock); + update_load(cpu); + spin_unlock(&pjcpu->load_lock); + } - if (--cl->request_active) { - goto done; + up_read(&pcpu->enable_sem); } + return 0; +} + +static struct notifier_block cpufreq_notifier_block = { + .notifier_call = cpufreq_interactive_notifier, +}; + +static ssize_t show_target_loads( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + int i; + ssize_t ret = 0; - pm_qos_update_request(&cl->qos_min_req, - PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + spin_lock(&target_loads_lock); - pm_qos_update_request(&cl->qos_max_req, - PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); + for (i = 0; i < ntarget_loads; i++) + ret += sprintf(buf + ret, "%u%s", target_loads[i], + i & 0x1 ? ":" : " "); -done: - mutex_unlock(&cl->mutex); + ret += sprintf(buf + ret, "\n"); + spin_unlock(&target_loads_lock); + return ret; } -/* Lock down to whatever # of cores online - * right now. - * - * A pm_qos request for 1 online CPU results in - * an instant cluster switch. - */ -static void cpufreq_interactive_lock_cores(void) +static ssize_t store_target_loads( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) { - unsigned int ncpus; + int ret; + const char *cp; + unsigned int *new_target_loads = NULL; + int ntokens = 1; + int i; - mutex_lock(&core_lock.mutex); + cp = buf; + while ((cp = strpbrk(cp + 1, " :"))) + ntokens++; - if (core_lock.request_active) { - goto arm_timer; + if (!(ntokens & 0x1)) + goto err_inval; + + new_target_loads = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); + if (!new_target_loads) { + ret = -ENOMEM; + goto err; } - ncpus = num_online_cpus(); - pm_qos_update_request(&core_lock.qos_min_req, ncpus); - pm_qos_update_request(&core_lock.qos_max_req, ncpus); - core_lock.request_active++; + cp = buf; + i = 0; + while (i < ntokens) { + if (sscanf(cp, "%u", &new_target_loads[i++]) != 1) + goto err_inval; -arm_timer: - mod_timer(&core_lock.unlock_timer, - jiffies + usecs_to_jiffies(core_lock.lock_period)); + cp = strpbrk(cp, " :"); + if (!cp) + break; + cp++; + } - mutex_unlock(&core_lock.mutex); -} + if (i != ntokens) + goto err_inval; -static int cpufreq_interactive_lock_cores_task(void *data) -{ - while(1) { - cpufreq_interactive_lock_cores(); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } - return 0; + spin_lock(&target_loads_lock); + if (target_loads != default_target_loads) + kfree(target_loads); + target_loads = new_target_loads; + ntarget_loads = ntokens; + spin_unlock(&target_loads_lock); + return count; + +err_inval: + ret = -EINVAL; +err: + kfree(new_target_loads); + return ret; } +static struct global_attr target_loads_attr = + __ATTR(target_loads, S_IRUGO | S_IWUSR, + show_target_loads, store_target_loads); + /* * Pulsed boost on input event raises CPUs to hispeed_freq and lets * usual algorithm of min_sample_time decide when to allow speed @@ -645,7 +675,7 @@ static void cpufreq_interactive_input_event(struct input_handle *handle, unsigned int code, int value) { if (input_boost_val && type == EV_SYN && code == SYN_REPORT) { - wake_up_process(core_lock.lock_task); + trace_cpufreq_interactive_boost("input"); cpufreq_interactive_boost(); } } @@ -724,44 +754,6 @@ static struct input_handler cpufreq_interactive_input_handler = { .id_table = cpufreq_interactive_ids, }; -static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%u\n", input_boost_val); -} - -static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - input_boost_val = val; - return count; -} - -define_one_global_rw(input_boost); - -static ssize_t show_io_is_busy(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", io_is_busy); -} - -static ssize_t store_io_is_busy(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - if (!strict_strtoul(buf, 0, &io_is_busy)) - return count; - return -EINVAL; -} - -static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644, - show_io_is_busy, store_io_is_busy); - static ssize_t show_hispeed_freq(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -874,6 +866,50 @@ static ssize_t store_timer_rate(struct kobject *kobj, static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644, show_timer_rate, store_timer_rate); +static ssize_t show_timer_slack( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", timer_slack_val); +} + +static ssize_t store_timer_slack( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtol(buf, 10, &val); + if (ret < 0) + return ret; + + timer_slack_val = val; + return count; +} + +define_one_global_rw(timer_slack); + +static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", input_boost_val); +} + +static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = strict_strtoul(buf, 0, &val); + if (ret < 0) + return ret; + input_boost_val = val; + return count; +} + +define_one_global_rw(input_boost); + static ssize_t show_boost(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -892,26 +928,72 @@ static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, boost_val = val; - if (boost_val) + if (boost_val) { + trace_cpufreq_interactive_boost("on"); cpufreq_interactive_boost(); - - if (!boost_val) - trace_cpufreq_interactive_unboost(hispeed_freq); + } else { + trace_cpufreq_interactive_unboost("off"); + } return count; } define_one_global_rw(boost); +static ssize_t store_boostpulse(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + boostpulse_endtime = ktime_to_us(ktime_get()) + boostpulse_duration_val; + trace_cpufreq_interactive_boost("pulse"); + cpufreq_interactive_boost(); + return count; +} + +static struct global_attr boostpulse = + __ATTR(boostpulse, 0200, NULL, store_boostpulse); + +static ssize_t show_boostpulse_duration( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", boostpulse_duration_val); +} + +static ssize_t store_boostpulse_duration( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + boostpulse_duration_val = val; + return count; +} + +define_one_global_rw(boostpulse_duration); + static struct attribute *interactive_attributes[] = { - &io_is_busy_attr.attr, + &target_loads_attr.attr, &hispeed_freq_attr.attr, &go_hispeed_load_attr.attr, &above_hispeed_delay.attr, &min_sample_time_attr.attr, &timer_rate_attr.attr, + &timer_slack.attr, &input_boost.attr, &boost.attr, + &boostpulse.attr, + &boostpulse_duration.attr, NULL, }; @@ -921,18 +1003,18 @@ static struct attribute_group interactive_attr_group = { }; static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, - unsigned long val, - void *data) + unsigned long val, + void *data) { switch (val) { - case IDLE_START: - cpufreq_interactive_idle_start(); - break; - case IDLE_END: - cpufreq_interactive_idle_end(); - break; + case IDLE_START: + cpufreq_interactive_idle_start(); + break; + case IDLE_END: + cpufreq_interactive_idle_end(); + break; } - + return 0; } @@ -955,32 +1037,34 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, freq_table = cpufreq_frequency_get_table(policy->cpu); + if (!hispeed_freq) + hispeed_freq = policy->max; for_each_cpu(j, policy->cpus) { + unsigned long expires; + pcpu = &per_cpu(cpuinfo, j); pcpu->policy = policy; pcpu->target_freq = policy->cur; pcpu->freq_table = freq_table; - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(j, - &pcpu->freq_change_time); - pcpu->time_in_idle = pcpu->freq_change_time_in_idle; - pcpu->idle_exit_time = pcpu->freq_change_time; - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(j, NULL); - pcpu->time_in_iowait = pcpu->freq_change_time_in_iowait; - - pcpu->timer_idlecancel = 1; pcpu->floor_freq = pcpu->target_freq; pcpu->floor_validate_time = - pcpu->freq_change_time; + ktime_to_us(ktime_get()); + pcpu->hispeed_validate_time = + pcpu->floor_validate_time; + down_write(&pcpu->enable_sem); + expires = jiffies + usecs_to_jiffies(timer_rate); + pcpu->cpu_timer.expires = expires; + add_timer_on(&pcpu->cpu_timer, j); + if (timer_slack_val >= 0) { + expires += usecs_to_jiffies(timer_slack_val); + pcpu->cpu_slack_timer.expires = expires; + add_timer_on(&pcpu->cpu_slack_timer, j); + } pcpu->governor_enabled = 1; - smp_wmb(); + up_write(&pcpu->enable_sem); } - if (!hispeed_freq) - hispeed_freq = policy->max; - /* * Do not register the idle hook and create sysfs * entries if we have already done so. @@ -996,31 +1080,29 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, rc = input_register_handler(&cpufreq_interactive_input_handler); if (rc) pr_warn("%s: failed to register input handler\n", - __func__); - + __func__); + idle_notifier_register(&cpufreq_interactive_idle_nb); + cpufreq_register_notifier( + &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); break; case CPUFREQ_GOV_STOP: for_each_cpu(j, policy->cpus) { pcpu = &per_cpu(cpuinfo, j); + down_write(&pcpu->enable_sem); pcpu->governor_enabled = 0; - smp_wmb(); del_timer_sync(&pcpu->cpu_timer); - - /* - * Reset idle exit time since we may cancel the timer - * before it can run after the last idle exit time, - * to avoid tripping the check in idle exit for a timer - * that is trying to run. - */ - pcpu->idle_exit_time = 0; + del_timer_sync(&pcpu->cpu_slack_timer); + up_write(&pcpu->enable_sem); } flush_work(&inputopen.inputopen_work); if (atomic_dec_return(&active_count) > 0) return 0; - + + cpufreq_unregister_notifier( + &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); idle_notifier_unregister(&cpufreq_interactive_idle_nb); input_unregister_handler(&cpufreq_interactive_input_handler); sysfs_remove_group(cpufreq_global_kobject, @@ -1040,78 +1122,49 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, return 0; } +static void cpufreq_interactive_nop_timer(unsigned long data) +{ +} + static int __init cpufreq_interactive_init(void) { unsigned int i; struct cpufreq_interactive_cpuinfo *pcpu; - /* - * If MAX_USER_RT_PRIO < MAX_RT_PRIO the kernel thread has higher priority than any user thread - * In this case MAX_USER_RT_PRIO = 99 and MAX_RT_PRIO = 100, therefore boosting the priority of this - * kernel thread above user threads which will, by my reason, increase interactvitiy. - */ - struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO-1 }; - - cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); - idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; - - go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; - min_sample_time = DEFAULT_MIN_SAMPLE_TIME; - above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; - timer_rate = DEFAULT_TIMER_RATE; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; /* Initalize per-cpu timers */ for_each_possible_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - init_timer(&pcpu->cpu_timer); + init_timer_deferrable(&pcpu->cpu_timer); pcpu->cpu_timer.function = cpufreq_interactive_timer; pcpu->cpu_timer.data = i; + init_timer(&pcpu->cpu_slack_timer); + pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; + spin_lock_init(&pcpu->load_lock); + init_rwsem(&pcpu->enable_sem); } + spin_lock_init(&target_loads_lock); spin_lock_init(&speedchange_cpumask_lock); - speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, NULL, - "cfinteractive"); + "cfinteractive"); if (IS_ERR(speedchange_task)) - return PTR_ERR(speedchange_task); + return PTR_ERR(speedchange_task); sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); get_task_struct(speedchange_task); inputopen_wq = create_workqueue("cfinteractive"); - + if (!inputopen_wq) goto err_freetask; - + INIT_WORK(&inputopen.inputopen_work, cpufreq_interactive_input_open); /* NB: wake up so the thread does not look hung to the freezer */ wake_up_process(speedchange_task); - pm_qos_add_request(&core_lock.qos_min_req, PM_QOS_MIN_ONLINE_CPUS, - PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); - - pm_qos_add_request(&core_lock.qos_max_req, PM_QOS_MAX_ONLINE_CPUS, - PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); - - init_timer(&core_lock.unlock_timer); - core_lock.unlock_timer.function = cpufreq_interactive_core_lock_timer; - core_lock.unlock_timer.data = 0; - - core_lock.request_active = 0; - core_lock.lock_period = DEFAULT_CORE_LOCK_PERIOD; - mutex_init(&core_lock.mutex); - - core_lock.lock_task = kthread_create(cpufreq_interactive_lock_cores_task, NULL, - "kinteractive_lockcores"); - - if (IS_ERR(core_lock.lock_task)) - return PTR_ERR(core_lock.lock_task); - - sched_setscheduler_nocheck(core_lock.lock_task, SCHED_FIFO, ¶m); - get_task_struct(core_lock.lock_task); - - INIT_WORK(&core_lock.unlock_work, cpufreq_interactive_unlock_cores); return cpufreq_register_governor(&cpufreq_gov_interactive); err_freetask: @@ -1131,11 +1184,6 @@ static void __exit cpufreq_interactive_exit(void) kthread_stop(speedchange_task); put_task_struct(speedchange_task); destroy_workqueue(inputopen_wq); - - pm_qos_remove_request(&core_lock.qos_min_req); - pm_qos_remove_request(&core_lock.qos_max_req); - kthread_stop(core_lock.lock_task); - put_task_struct(core_lock.lock_task); } module_exit(cpufreq_interactive_exit); diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h index 64c9a825346..951e6ca12da 100644 --- a/include/trace/events/cpufreq_interactive.h +++ b/include/trace/events/cpufreq_interactive.h @@ -8,7 +8,7 @@ DECLARE_EVENT_CLASS(set, TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), + unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq), TP_STRUCT__entry( @@ -36,68 +36,74 @@ DEFINE_EVENT(set, cpufreq_interactive_setspeed, DECLARE_EVENT_CLASS(loadeval, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq), + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), TP_STRUCT__entry( __field(unsigned long, cpu_id ) __field(unsigned long, load ) - __field(unsigned long, curfreq ) - __field(unsigned long, targfreq ) + __field(unsigned long, curtarg ) + __field(unsigned long, curactual ) + __field(unsigned long, newtarg ) ), TP_fast_assign( __entry->cpu_id = cpu_id; __entry->load = load; - __entry->curfreq = curfreq; - __entry->targfreq = targfreq; + __entry->curtarg = curtarg; + __entry->curactual = curactual; + __entry->newtarg = newtarg; ), - TP_printk("cpu=%lu load=%lu cur=%lu targ=%lu", - __entry->cpu_id, __entry->load, __entry->curfreq, - __entry->targfreq) + TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", + __entry->cpu_id, __entry->load, __entry->curtarg, + __entry->curactual, __entry->newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_target, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_already, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_notyet, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); TRACE_EVENT(cpufreq_interactive_boost, - TP_PROTO(unsigned long freq), - TP_ARGS(freq), + TP_PROTO(const char *s), + TP_ARGS(s), TP_STRUCT__entry( - __field(unsigned long, freq) + __string(s, s) ), TP_fast_assign( - __entry->freq = freq; + __assign_str(s, s); ), - TP_printk("freq=%lu", __entry->freq) + TP_printk("%s", __get_str(s)) ); TRACE_EVENT(cpufreq_interactive_unboost, - TP_PROTO(unsigned long freq), - TP_ARGS(freq), + TP_PROTO(const char *s), + TP_ARGS(s), TP_STRUCT__entry( - __field(unsigned long, freq) + __string(s, s) ), TP_fast_assign( - __entry->freq = freq; + __assign_str(s, s); ), - TP_printk("freq=%lu", __entry->freq) + TP_printk("%s", __get_str(s)) ); #endif /* _TRACE_CPUFREQ_INTERACTIVE_H */ From 85a5f7db9f1a52a459f2d4c175ce6953dc19a23f Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 30 Dec 2012 14:08:44 -0500 Subject: [PATCH 256/678] cpufreq: ondemand.c: better performance --- drivers/cpufreq/cpufreq_ondemand.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d0407135339..67ef5e9fe7f 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -37,12 +37,12 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (3) -#define DEF_FREQUENCY_UP_THRESHOLD (85) -#define DEF_SAMPLING_DOWN_FACTOR (1) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (3) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) -#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define MICRO_FREQUENCY_UP_THRESHOLD (80) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -323,7 +323,7 @@ static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, } #endif -static unsigned int Touch_poke_attr[4] = {1100000, 860000, 0, 0}; +static unsigned int Touch_poke_attr[4] = {1500000, 0, 0, 0}; static unsigned int Touch_poke_boost_duration_ms = 0; static unsigned long Touch_poke_boost_till_jiffies = 0; From ee72f025eacc1de2a5ec4b3382dcac262a130cdf Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 30 Dec 2012 14:09:20 -0500 Subject: [PATCH 257/678] mach-tegra: cpu-tegra3.c: better performance --- arch/arm/mach-tegra/cpu-tegra3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index cdde7ef7e2b..04feb500386 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,9 +39,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 1000 +#define UP2G0_DELAY_MS 100 #define UP2Gn_DELAY_MS 200 -#define DOWN_DELAY_MS 1000 +#define DOWN_DELAY_MS 4000 static struct mutex *tegra3_cpu_lock; @@ -66,7 +66,7 @@ module_param(idle_bottom_freq, uint, 0644); static int mp_overhead = 10; module_param(mp_overhead, int, 0644); -static int balance_level = 70; +static int balance_level = 60; module_param(balance_level, int, 0644); static struct clk *cpu_clk; @@ -438,7 +438,7 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) hp_state = TEGRA_HP_DOWN; queue_delayed_work( #ifdef CONFIG_TEGRA_RUNNABLE_THREAD - hotplug_wq, &hotplug_work, up_delay); + hotplug_wq, &hotplug_work, up2gn_delay); #else hotplug_wq, &hotplug_work, down_delay); #endif From 27b00a4d7e8716d8a384696b58ba85f40dc74f8a Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 30 Dec 2012 14:42:04 -0500 Subject: [PATCH 258/678] Revert "video: tegra: nvmap: Add sanity checks for page pools." This reverts commit a6188919776f17e15ff23eb0083e95e47aa32b65. --- drivers/video/tegra/nvmap/nvmap_handle.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c index eddaef875f3..539b7ce9801 100644 --- a/drivers/video/tegra/nvmap/nvmap_handle.c +++ b/drivers/video/tegra/nvmap/nvmap_handle.c @@ -91,11 +91,8 @@ static struct page *nvmap_page_pool_alloc_locked(struct nvmap_page_pool *pool) { struct page *page = NULL; - if (pool->npages > 0) { + if (pool->npages > 0) page = pool->page_array[--pool->npages]; - atomic_dec(&page->_count); - BUG_ON(atomic_read(&page->_count) != 1); - } return page; } @@ -116,9 +113,7 @@ static bool nvmap_page_pool_release_locked(struct nvmap_page_pool *pool, { int ret = false; - BUG_ON(atomic_read(&page->_count) != 1); if (enable_pp && pool->npages < pool->max_pages) { - atomic_inc(&page->_count); pool->page_array[pool->npages++] = page; ret = true; } @@ -145,7 +140,6 @@ static int nvmap_page_pool_get_available_count(struct nvmap_page_pool *pool) static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free) { - int err; int i = nr_free; int idx = 0; struct page *page; @@ -161,12 +155,8 @@ static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free) i--; } - if (idx) { - /* This op should never fail. */ - err = set_pages_array_wb(pool->shrink_array, idx); - BUG_ON(err); - } - + if (idx) + set_pages_array_wb(pool->shrink_array, idx); while (idx--) __free_page(pool->shrink_array[idx]); nvmap_page_pool_unlock(pool); From 1e44421d0f508d161ed37396d7034b1de8ecf070 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 2 Jan 2013 16:05:37 -0500 Subject: [PATCH 259/678] Revert "cpufreq: interactive.c: kang newest interactive" This reverts commit 2a6c6b42be0784584751e43dca310d2a14c62ccf. --- drivers/cpufreq/cpufreq_interactive.c | 966 ++++++++++----------- include/trace/events/cpufreq_interactive.h | 58 +- 2 files changed, 485 insertions(+), 539 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 1ac148b6929..19daca8c5e4 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -19,39 +19,48 @@ #include #include #include -#include -#include -#include +#include #include #include #include #include #include #include +#include #include #include #include +#include +#include + +#include "../../arch/arm/mach-tegra/clock.h" +#include "../../arch/arm/mach-tegra/pm.h" #define CREATE_TRACE_POINTS #include +/* lpcpu variables */ +static struct clk *cpu_lp_clk; +static unsigned int idle_top_freq; + static atomic_t active_count = ATOMIC_INIT(0); struct cpufreq_interactive_cpuinfo { struct timer_list cpu_timer; - struct timer_list cpu_slack_timer; - spinlock_t load_lock; /* protects the next 4 fields */ + int timer_idlecancel; u64 time_in_idle; - u64 time_in_idle_timestamp; - u64 cputime_speedadj; - u64 cputime_speedadj_timestamp; + u64 time_in_iowait; + u64 idle_exit_time; + u64 timer_run_time; + int idling; + u64 freq_change_time; + u64 freq_change_time_in_idle; + u64 freq_change_time_in_iowait; struct cpufreq_policy *policy; struct cpufreq_frequency_table *freq_table; unsigned int target_freq; unsigned int floor_freq; u64 floor_validate_time; - u64 hispeed_validate_time; - struct rw_semaphore enable_sem; int governor_enabled; }; @@ -62,44 +71,55 @@ static struct task_struct *speedchange_task; static cpumask_t speedchange_cpumask; static spinlock_t speedchange_cpumask_lock; +struct cpufreq_interactive_core_lock { + struct pm_qos_request_list qos_min_req; + struct pm_qos_request_list qos_max_req; + struct task_struct *lock_task; + struct work_struct unlock_work; + struct timer_list unlock_timer; + int request_active; + unsigned long lock_period; + struct mutex mutex; +}; + +/* default timeout for core lock down */ +#define DEFAULT_CORE_LOCK_PERIOD 200000 /* 200 ms */ + +static struct cpufreq_interactive_core_lock core_lock; + /* Hi speed to bump to from lo speed when load burst (default max) */ -static unsigned int hispeed_freq; +static unsigned int hispeed_freq = 1300000; -/* Go to hi speed when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 99 -static unsigned long go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; +/* Go to hispeed_freq when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 85 +static unsigned long go_hispeed_load; -/* Target load. Lower values result in higher CPU speeds. */ -#define DEFAULT_TARGET_LOAD 90 -static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; -static spinlock_t target_loads_lock; -static unsigned int *target_loads = default_target_loads; -static int ntarget_loads = ARRAY_SIZE(default_target_loads); +/* Consider IO as busy */ +static unsigned long io_is_busy; /* * The minimum amount of time to spend at a frequency before we can ramp down. */ -#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) -static unsigned long min_sample_time = DEFAULT_MIN_SAMPLE_TIME; +#define DEFAULT_MIN_SAMPLE_TIME 60000; +static unsigned long min_sample_time; /* * The sample rate of the timer used to increase frequency */ -#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) -static unsigned long timer_rate = DEFAULT_TIMER_RATE; +#define DEFAULT_TIMER_RATE 40000; +static unsigned long timer_rate; /* * Wait this long before raising speed above hispeed, by default a single * timer interval. */ #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE -static unsigned long above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; +static unsigned long above_hispeed_delay_val; /* * Boost pulse to hispeed on touchscreen input. */ - -static int input_boost_val; +static int input_boost_val = 1; struct cpufreq_interactive_inputopen { struct input_handle *handle; @@ -109,19 +129,10 @@ struct cpufreq_interactive_inputopen { static struct cpufreq_interactive_inputopen inputopen; static struct workqueue_struct *inputopen_wq; -/* Non-zero means indefinite speed boost active */ -static int boost_val; -/* Duration of a boot pulse in usecs */ -static int boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; -/* End time of boost pulse in ktime converted to usecs */ -static u64 boostpulse_endtime; - /* - * Max additional time to wait in idle, beyond timer_rate, at speeds above - * minimum before wakeup to reduce speed, or -1 if unnecessary. + * Non-zero means longer-term speed boost active. */ -#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) -static int timer_slack_val = DEFAULT_TIMER_SLACK; +static int boost_val; static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int event); @@ -136,208 +147,156 @@ struct cpufreq_governor cpufreq_gov_interactive = { .owner = THIS_MODULE, }; -static void cpufreq_interactive_timer_resched( +static unsigned int cpufreq_interactive_get_target( + int cpu_load, int load_since_change, struct cpufreq_interactive_cpuinfo *pcpu) { - unsigned long expires = jiffies + usecs_to_jiffies(timer_rate); - - mod_timer_pinned(&pcpu->cpu_timer, expires); - if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) { - expires += usecs_to_jiffies(timer_slack_val); - mod_timer_pinned(&pcpu->cpu_slack_timer, expires); - } - - spin_lock(&pcpu->load_lock); - pcpu->time_in_idle = - get_cpu_idle_time_us(smp_processor_id(), - &pcpu->time_in_idle_timestamp); - pcpu->cputime_speedadj = 0; - pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; - spin_unlock(&pcpu->load_lock); -} - -static unsigned int freq_to_targetload(unsigned int freq) -{ - int i; - unsigned int ret; - - spin_lock(&target_loads_lock); - - for (i = 0; i < ntarget_loads - 1 && freq >= target_loads[i+1]; i += 2) - ; - - ret = target_loads[i]; - spin_unlock(&target_loads_lock); - return ret; -} - -/* - * If increasing frequencies never map to a lower target load then - * choose_freq() will find the minimum frequency that does not exceed its - * target load given the current load. - */ - -static unsigned int choose_freq( - struct cpufreq_interactive_cpuinfo *pcpu, unsigned int loadadjfreq) -{ - unsigned int freq = pcpu->policy->cur; - unsigned int prevfreq, freqmin, freqmax; - unsigned int tl; - int index; - - freqmin = 0; - freqmax = UINT_MAX; - - do { - prevfreq = freq; - tl = freq_to_targetload(freq); - - /* - * Find the lowest frequency where the computed load is less - * than or equal to the target load. - */ + unsigned int target_freq; - cpufreq_frequency_table_target( - pcpu->policy, pcpu->freq_table, loadadjfreq / tl, - CPUFREQ_RELATION_L, &index); - freq = pcpu->freq_table[index].frequency; - - if (freq > prevfreq) { - /* The previous frequency is too low. */ - freqmin = prevfreq; - - if (freq >= freqmax) { - /* - * Find the highest frequency that is less - * than freqmax. - */ - cpufreq_frequency_table_target( - pcpu->policy, pcpu->freq_table, - freqmax - 1, CPUFREQ_RELATION_H, - &index); - freq = pcpu->freq_table[index].frequency; - - if (freq == freqmin) { - /* - * The first frequency below freqmax - * has already been found to be too - * low. freqmax is the lowest speed - * we found that is fast enough. - */ - freq = freqmax; - break; - } - } - } else if (freq < prevfreq) { - /* The previous frequency is high enough. */ - freqmax = prevfreq; - - if (freq <= freqmin) { - /* - * Find the lowest frequency that is higher - * than freqmin. - */ - cpufreq_frequency_table_target( - pcpu->policy, pcpu->freq_table, - freqmin + 1, CPUFREQ_RELATION_L, - &index); - freq = pcpu->freq_table[index].frequency; - - /* - * If freqmax is the first frequency above - * freqmin then we have already found that - * this speed is fast enough. - */ - if (freq == freqmax) - break; + /* + * Choose greater of short-term load (since last idle timer + * started or timer function re-armed itself) or long-term load + * (since last frequency change). + */ + if (load_since_change > cpu_load) + cpu_load = load_since_change; + + /* Jump boost policy */ + if (cpu_load >= go_hispeed_load || boost_val) { + if (pcpu->target_freq < hispeed_freq && + hispeed_freq < pcpu->policy->max) { + target_freq = hispeed_freq; + } else { + target_freq = pcpu->policy->max * cpu_load / 100; + + if (target_freq < hispeed_freq) + target_freq = hispeed_freq; + + if (pcpu->target_freq == hispeed_freq && + target_freq > hispeed_freq && + cputime64_sub(pcpu->timer_run_time, + pcpu->freq_change_time) + < above_hispeed_delay_val) { + + target_freq = pcpu->target_freq; + trace_cpufreq_interactive_notyet( + smp_processor_id(), + cpu_load, + pcpu->target_freq, + target_freq); } } + } else { + target_freq = idle_top_freq * cpu_load / 100; + } - /* If same frequency chosen as previous then done. */ - } while (freq != prevfreq); - - return freq; + target_freq = min(target_freq, pcpu->policy->max); + return target_freq; } -static u64 update_load(int cpu) +static inline cputime64_t get_cpu_iowait_time( + unsigned int cpu, cputime64_t *wall) { - struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); - u64 now; - u64 now_idle; - unsigned int delta_idle; - unsigned int delta_time; - u64 active_time; + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); - now_idle = get_cpu_idle_time_us(cpu, &now); - delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); - delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); - active_time = delta_time - delta_idle; - pcpu->cputime_speedadj += active_time * pcpu->policy->cur; + if (iowait_time == -1ULL) + return 0; - pcpu->time_in_idle = now_idle; - pcpu->time_in_idle_timestamp = now; - return now; + return iowait_time; } static void cpufreq_interactive_timer(unsigned long data) { - u64 now; + unsigned int delta_idle; + unsigned int delta_iowait; unsigned int delta_time; - u64 cputime_speedadj; int cpu_load; + int load_since_change; + u64 time_in_idle; + u64 time_in_iowait; + u64 idle_exit_time; struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, data); + u64 now_idle; + u64 now_iowait; unsigned int new_freq; - unsigned int loadadjfreq; unsigned int index; unsigned long flags; - bool boosted; - if (!down_read_trylock(&pcpu->enable_sem)) - return; + smp_rmb(); + if (!pcpu->governor_enabled) goto exit; - spin_lock(&pcpu->load_lock); - now = update_load(data); - delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); - cputime_speedadj = pcpu->cputime_speedadj; - spin_unlock(&pcpu->load_lock); + /* + * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, + * this lets idle exit know the current idle time sample has + * been processed, and idle exit can generate a new sample and + * re-arm the timer. This prevents a concurrent idle + * exit on that CPU from writing a new set of info at the same time + * the timer function runs (the timer function can't use that info + * until more time passes). + */ + time_in_idle = pcpu->time_in_idle; + time_in_iowait = pcpu->time_in_iowait; + idle_exit_time = pcpu->idle_exit_time; + now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); + now_iowait = get_cpu_iowait_time(data, NULL); + smp_wmb(); + + /* If we raced with cancelling a timer, skip. */ + if (!idle_exit_time) + goto exit; + + delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle); + delta_iowait = (unsigned int) cputime64_sub(now_iowait, time_in_iowait); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + idle_exit_time); - if (WARN_ON_ONCE(!delta_time)) + /* + * If timer ran less than 1ms after short-term sample started, retry. + */ + if (delta_time < 1000) goto rearm; - do_div(cputime_speedadj, delta_time); - loadadjfreq = (unsigned int)cputime_speedadj * 100; - cpu_load = loadadjfreq / pcpu->target_freq; - boosted = boost_val || now < boostpulse_endtime; + if (delta_idle > delta_time) + cpu_load = 0; + else { + if (io_is_busy && delta_idle >= delta_iowait) + delta_idle -= delta_iowait; - if (cpu_load >= go_hispeed_load || boosted) { - if (pcpu->target_freq < hispeed_freq) { - new_freq = hispeed_freq; - } else { - new_freq = choose_freq(pcpu, loadadjfreq); - - if (new_freq < hispeed_freq) - new_freq = hispeed_freq; - } - } else { - new_freq = choose_freq(pcpu, loadadjfreq); + cpu_load = 100 * (delta_time - delta_idle) / delta_time; } - if (pcpu->target_freq >= hispeed_freq && - new_freq > pcpu->target_freq && - now - pcpu->hispeed_validate_time < above_hispeed_delay_val) { - trace_cpufreq_interactive_notyet( - data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); - goto rearm; + delta_idle = (unsigned int) cputime64_sub(now_idle, + pcpu->freq_change_time_in_idle); + delta_iowait = (unsigned int) cputime64_sub(now_iowait, + pcpu->freq_change_time_in_iowait); + delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, + pcpu->freq_change_time); + + if ((delta_time == 0) || (delta_idle > delta_time)) + load_since_change = 0; + else { + if (io_is_busy && delta_idle >= delta_iowait) + delta_idle -= delta_iowait; + + load_since_change = + 100 * (delta_time - delta_idle) / delta_time; } - - pcpu->hispeed_validate_time = now; + + /* + * Combine short-term load (since last idle timer started or timer + * function re-armed itself) and long-term load (since last frequency + * change) to determine new target frequency. + * + * This function implements the cpufreq scaling policy + */ + new_freq = cpufreq_interactive_get_target(cpu_load, load_since_change, + pcpu); if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, - new_freq, CPUFREQ_RELATION_L, + new_freq, CPUFREQ_RELATION_H, &index)) { pr_warn_once("timer %d: cpufreq_frequency_table_target error\n", (int) data); @@ -351,37 +310,28 @@ static void cpufreq_interactive_timer(unsigned long data) * floor frequency for the minimum sample time since last validated. */ if (new_freq < pcpu->floor_freq) { - if (now - pcpu->floor_validate_time < min_sample_time) { - trace_cpufreq_interactive_notyet( - data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); + if (cputime64_sub(pcpu->timer_run_time, + pcpu->floor_validate_time) + < min_sample_time) { + + trace_cpufreq_interactive_notyet(data, cpu_load, + pcpu->target_freq, new_freq); goto rearm; } } - /* - * Update the timestamp for checking whether speed has been held at - * or above the selected frequency for a minimum of min_sample_time, - * if not boosted to hispeed_freq. If boosted to hispeed_freq then we - * allow the speed to drop as soon as the boostpulse duration expires - * (or the indefinite boost is turned off). - */ - - if (!boosted || new_freq > hispeed_freq) { - pcpu->floor_freq = new_freq; - pcpu->floor_validate_time = now; - } + pcpu->floor_freq = new_freq; + pcpu->floor_validate_time = pcpu->timer_run_time; if (pcpu->target_freq == new_freq) { - trace_cpufreq_interactive_already( - data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); + trace_cpufreq_interactive_already(data, cpu_load, + pcpu->target_freq, new_freq); goto rearm_if_notmax; } trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, - pcpu->policy->cur, new_freq); - + new_freq); + pcpu->target_freq = new_freq; spin_lock_irqsave(&speedchange_cpumask_lock, flags); cpumask_set_cpu(data, &speedchange_cpumask); @@ -397,11 +347,31 @@ static void cpufreq_interactive_timer(unsigned long data) goto exit; rearm: - if (!timer_pending(&pcpu->cpu_timer)) - cpufreq_interactive_timer_resched(pcpu); + if (!timer_pending(&pcpu->cpu_timer)) { + /* + * If already at min: if that CPU is idle, don't set timer. + * Else cancel the timer if that CPU goes idle. We don't + * need to re-evaluate speed until the next idle exit. + */ + if (pcpu->target_freq == pcpu->policy->min) { + smp_rmb(); + + if (pcpu->idling) + goto exit; + + pcpu->timer_idlecancel = 1; + } + + pcpu->time_in_idle = get_cpu_idle_time_us( + data, &pcpu->idle_exit_time); + pcpu->time_in_iowait = get_cpu_iowait_time( + data, NULL); + + mod_timer(&pcpu->cpu_timer, + jiffies + usecs_to_jiffies(timer_rate)); + } exit: - up_read(&pcpu->enable_sem); return; } @@ -411,16 +381,15 @@ static void cpufreq_interactive_idle_start(void) &per_cpu(cpuinfo, smp_processor_id()); int pending; - if (!down_read_trylock(&pcpu->enable_sem)) - return; - if (!pcpu->governor_enabled) { - up_read(&pcpu->enable_sem); + if (!pcpu->governor_enabled) return; - } + pcpu->idling = 1; + smp_wmb(); pending = timer_pending(&pcpu->cpu_timer); if (pcpu->target_freq != pcpu->policy->min) { +#ifdef CONFIG_SMP /* * Entering idle while not at lowest speed. On some * platforms this can hold the other CPU(s) at that speed @@ -429,11 +398,35 @@ static void cpufreq_interactive_idle_start(void) * min indefinitely. This should probably be a quirk of * the CPUFreq driver. */ - if (!pending) - cpufreq_interactive_timer_resched(pcpu); + if (!pending) { + pcpu->time_in_idle = get_cpu_idle_time_us( + smp_processor_id(), &pcpu->idle_exit_time); + pcpu->time_in_iowait = get_cpu_iowait_time( + smp_processor_id(), NULL); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, + jiffies + usecs_to_jiffies(timer_rate)); + } +#endif + } else { + /* + * If at min speed and entering idle after load has + * already been evaluated, and a timer has been set just in + * case the CPU suddenly goes busy, cancel that timer. The + * CPU didn't go busy; we'll recheck things upon idle exit. + */ + if (pending && pcpu->timer_idlecancel) { + del_timer(&pcpu->cpu_timer); + /* + * Ensure last timer run time is after current idle + * sample start time, so next idle exit will always + * start a new idle sampling period. + */ + pcpu->idle_exit_time = 0; + pcpu->timer_idlecancel = 0; + } } - up_read(&pcpu->enable_sem); } static void cpufreq_interactive_idle_end(void) @@ -441,23 +434,34 @@ static void cpufreq_interactive_idle_end(void) struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, smp_processor_id()); - if (!down_read_trylock(&pcpu->enable_sem)) - return; - if (!pcpu->governor_enabled) { - up_read(&pcpu->enable_sem); - return; - } + pcpu->idling = 0; + smp_wmb(); - /* Arm the timer for 1-2 ticks later if not already. */ - if (!timer_pending(&pcpu->cpu_timer)) { - cpufreq_interactive_timer_resched(pcpu); - } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { - del_timer(&pcpu->cpu_timer); - del_timer(&pcpu->cpu_slack_timer); - cpufreq_interactive_timer(smp_processor_id()); + /* + * Arm the timer for 1-2 ticks later if not already, and if the timer + * function has already processed the previous load sampling + * interval. (If the timer is not pending but has not processed + * the previous interval, it is probably racing with us on another + * CPU. Let it compute load based on the previous sample and then + * re-arm the timer for another interval when it's done, rather + * than updating the interval start time to be "now", which doesn't + * give the timer function enough time to make a decision on this + * run.) + */ + if (timer_pending(&pcpu->cpu_timer) == 0 && + pcpu->timer_run_time >= pcpu->idle_exit_time && + pcpu->governor_enabled) { + pcpu->time_in_idle = + get_cpu_idle_time_us(smp_processor_id(), + &pcpu->idle_exit_time); + pcpu->time_in_iowait = + get_cpu_iowait_time(smp_processor_id(), + NULL); + pcpu->timer_idlecancel = 0; + mod_timer(&pcpu->cpu_timer, + jiffies + usecs_to_jiffies(timer_rate)); } - up_read(&pcpu->enable_sem); } static int cpufreq_interactive_speedchange_task(void *data) @@ -472,8 +476,7 @@ static int cpufreq_interactive_speedchange_task(void *data) spin_lock_irqsave(&speedchange_cpumask_lock, flags); if (cpumask_empty(&speedchange_cpumask)) { - spin_unlock_irqrestore(&speedchange_cpumask_lock, - flags); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); schedule(); if (kthread_should_stop()) @@ -492,12 +495,10 @@ static int cpufreq_interactive_speedchange_task(void *data) unsigned int max_freq = 0; pcpu = &per_cpu(cpuinfo, cpu); - if (!down_read_trylock(&pcpu->enable_sem)) - continue; - if (!pcpu->governor_enabled) { - up_read(&pcpu->enable_sem); + smp_rmb(); + + if (!pcpu->governor_enabled) continue; - } for_each_cpu(j, pcpu->policy->cpus) { struct cpufreq_interactive_cpuinfo *pjcpu = @@ -507,25 +508,32 @@ static int cpufreq_interactive_speedchange_task(void *data) max_freq = pjcpu->target_freq; } - if (max_freq != pcpu->policy->cur) - __cpufreq_driver_target(pcpu->policy, - max_freq, - CPUFREQ_RELATION_H); - trace_cpufreq_interactive_setspeed(cpu, - pcpu->target_freq, - pcpu->policy->cur); + __cpufreq_driver_target(pcpu->policy, + max_freq, + CPUFREQ_RELATION_H); - up_read(&pcpu->enable_sem); + trace_cpufreq_interactive_setspeed(cpu, pcpu->target_freq, + pcpu->policy->cur); + + pcpu->freq_change_time_in_idle = + get_cpu_idle_time_us(cpu, + &pcpu->freq_change_time); + pcpu->freq_change_time_in_iowait = + get_cpu_iowait_time(cpu, NULL); } } return 0; } +static unsigned int Touch_poke_attr[4] = {1100000, 860000, 0, 0}; + static void cpufreq_interactive_boost(void) { int i; int anyboost = 0; + unsigned int nr_cpus; + unsigned int input_boost_freq; unsigned long flags; struct cpufreq_interactive_cpuinfo *pcpu; @@ -534,20 +542,24 @@ static void cpufreq_interactive_boost(void) for_each_online_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - if (pcpu->target_freq < hispeed_freq) { - pcpu->target_freq = hispeed_freq; + nr_cpus = num_online_cpus(); + + if (!is_lp_cluster()) { + input_boost_freq = Touch_poke_attr[nr_cpus-1]; + } else { + input_boost_freq = idle_top_freq; + } + if (pcpu->target_freq < input_boost_freq) { + pcpu->target_freq = input_boost_freq; cpumask_set_cpu(i, &speedchange_cpumask); - pcpu->hispeed_validate_time = - ktime_to_us(ktime_get()); anyboost = 1; } - /* - * Set floor freq and (re)start timer for when last + /* Set floor freq and (re)start timer for when last * validated. */ - pcpu->floor_freq = hispeed_freq; + pcpu->floor_freq = input_boost_freq; pcpu->floor_validate_time = ktime_to_us(ktime_get()); } @@ -557,112 +569,70 @@ static void cpufreq_interactive_boost(void) wake_up_process(speedchange_task); } -static int cpufreq_interactive_notifier( - struct notifier_block *nb, unsigned long val, void *data) +static void cpufreq_interactive_core_lock_timer(unsigned long data) { - struct cpufreq_freqs *freq = data; - struct cpufreq_interactive_cpuinfo *pcpu; - int cpu; + queue_work(inputopen_wq, &core_lock.unlock_work); +} - if (val == CPUFREQ_POSTCHANGE) { - pcpu = &per_cpu(cpuinfo, freq->cpu); - if (!down_read_trylock(&pcpu->enable_sem)) - return 0; - if (!pcpu->governor_enabled) { - up_read(&pcpu->enable_sem); - return 0; - } +static void cpufreq_interactive_unlock_cores(struct work_struct *wq) +{ + struct cpufreq_interactive_core_lock *cl = + container_of(wq, struct cpufreq_interactive_core_lock, + unlock_work); - for_each_cpu(cpu, pcpu->policy->cpus) { - struct cpufreq_interactive_cpuinfo *pjcpu = - &per_cpu(cpuinfo, cpu); - spin_lock(&pjcpu->load_lock); - update_load(cpu); - spin_unlock(&pjcpu->load_lock); - } + mutex_lock(&cl->mutex); - up_read(&pcpu->enable_sem); + if (--cl->request_active) { + goto done; } - return 0; -} - -static struct notifier_block cpufreq_notifier_block = { - .notifier_call = cpufreq_interactive_notifier, -}; - -static ssize_t show_target_loads( - struct kobject *kobj, struct attribute *attr, char *buf) -{ - int i; - ssize_t ret = 0; - spin_lock(&target_loads_lock); + pm_qos_update_request(&cl->qos_min_req, + PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); - for (i = 0; i < ntarget_loads; i++) - ret += sprintf(buf + ret, "%u%s", target_loads[i], - i & 0x1 ? ":" : " "); + pm_qos_update_request(&cl->qos_max_req, + PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); - ret += sprintf(buf + ret, "\n"); - spin_unlock(&target_loads_lock); - return ret; +done: + mutex_unlock(&cl->mutex); } -static ssize_t store_target_loads( - struct kobject *kobj, struct attribute *attr, const char *buf, - size_t count) +/* Lock down to whatever # of cores online + * right now. + * + * A pm_qos request for 1 online CPU results in + * an instant cluster switch. + */ +static void cpufreq_interactive_lock_cores(void) { - int ret; - const char *cp; - unsigned int *new_target_loads = NULL; - int ntokens = 1; - int i; - - cp = buf; - while ((cp = strpbrk(cp + 1, " :"))) - ntokens++; - - if (!(ntokens & 0x1)) - goto err_inval; + unsigned int ncpus; - new_target_loads = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); - if (!new_target_loads) { - ret = -ENOMEM; - goto err; - } + mutex_lock(&core_lock.mutex); - cp = buf; - i = 0; - while (i < ntokens) { - if (sscanf(cp, "%u", &new_target_loads[i++]) != 1) - goto err_inval; - - cp = strpbrk(cp, " :"); - if (!cp) - break; - cp++; + if (core_lock.request_active) { + goto arm_timer; } - if (i != ntokens) - goto err_inval; + ncpus = num_online_cpus(); + pm_qos_update_request(&core_lock.qos_min_req, ncpus); + pm_qos_update_request(&core_lock.qos_max_req, ncpus); + core_lock.request_active++; - spin_lock(&target_loads_lock); - if (target_loads != default_target_loads) - kfree(target_loads); - target_loads = new_target_loads; - ntarget_loads = ntokens; - spin_unlock(&target_loads_lock); - return count; +arm_timer: + mod_timer(&core_lock.unlock_timer, + jiffies + usecs_to_jiffies(core_lock.lock_period)); -err_inval: - ret = -EINVAL; -err: - kfree(new_target_loads); - return ret; + mutex_unlock(&core_lock.mutex); } -static struct global_attr target_loads_attr = - __ATTR(target_loads, S_IRUGO | S_IWUSR, - show_target_loads, store_target_loads); +static int cpufreq_interactive_lock_cores_task(void *data) +{ + while(1) { + cpufreq_interactive_lock_cores(); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + return 0; +} /* * Pulsed boost on input event raises CPUs to hispeed_freq and lets @@ -675,7 +645,7 @@ static void cpufreq_interactive_input_event(struct input_handle *handle, unsigned int code, int value) { if (input_boost_val && type == EV_SYN && code == SYN_REPORT) { - trace_cpufreq_interactive_boost("input"); + wake_up_process(core_lock.lock_task); cpufreq_interactive_boost(); } } @@ -754,6 +724,44 @@ static struct input_handler cpufreq_interactive_input_handler = { .id_table = cpufreq_interactive_ids, }; +static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", input_boost_val); +} + +static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = strict_strtoul(buf, 0, &val); + if (ret < 0) + return ret; + input_boost_val = val; + return count; +} + +define_one_global_rw(input_boost); + +static ssize_t show_io_is_busy(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", io_is_busy); +} + +static ssize_t store_io_is_busy(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + if (!strict_strtoul(buf, 0, &io_is_busy)) + return count; + return -EINVAL; +} + +static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644, + show_io_is_busy, store_io_is_busy); + static ssize_t show_hispeed_freq(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -866,50 +874,6 @@ static ssize_t store_timer_rate(struct kobject *kobj, static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644, show_timer_rate, store_timer_rate); -static ssize_t show_timer_slack( - struct kobject *kobj, struct attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", timer_slack_val); -} - -static ssize_t store_timer_slack( - struct kobject *kobj, struct attribute *attr, const char *buf, - size_t count) -{ - int ret; - unsigned long val; - - ret = kstrtol(buf, 10, &val); - if (ret < 0) - return ret; - - timer_slack_val = val; - return count; -} - -define_one_global_rw(timer_slack); - -static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%u\n", input_boost_val); -} - -static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - input_boost_val = val; - return count; -} - -define_one_global_rw(input_boost); - static ssize_t show_boost(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -928,72 +892,26 @@ static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, boost_val = val; - if (boost_val) { - trace_cpufreq_interactive_boost("on"); + if (boost_val) cpufreq_interactive_boost(); - } else { - trace_cpufreq_interactive_unboost("off"); - } - - return count; -} - -define_one_global_rw(boost); - -static ssize_t store_boostpulse(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; - - boostpulse_endtime = ktime_to_us(ktime_get()) + boostpulse_duration_val; - trace_cpufreq_interactive_boost("pulse"); - cpufreq_interactive_boost(); - return count; -} - -static struct global_attr boostpulse = - __ATTR(boostpulse, 0200, NULL, store_boostpulse); - -static ssize_t show_boostpulse_duration( - struct kobject *kobj, struct attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", boostpulse_duration_val); -} - -static ssize_t store_boostpulse_duration( - struct kobject *kobj, struct attribute *attr, const char *buf, - size_t count) -{ - int ret; - unsigned long val; - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; + if (!boost_val) + trace_cpufreq_interactive_unboost(hispeed_freq); - boostpulse_duration_val = val; return count; } -define_one_global_rw(boostpulse_duration); +define_one_global_rw(boost); static struct attribute *interactive_attributes[] = { - &target_loads_attr.attr, + &io_is_busy_attr.attr, &hispeed_freq_attr.attr, &go_hispeed_load_attr.attr, &above_hispeed_delay.attr, &min_sample_time_attr.attr, &timer_rate_attr.attr, - &timer_slack.attr, &input_boost.attr, &boost.attr, - &boostpulse.attr, - &boostpulse_duration.attr, NULL, }; @@ -1003,18 +921,18 @@ static struct attribute_group interactive_attr_group = { }; static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, - unsigned long val, - void *data) + unsigned long val, + void *data) { switch (val) { - case IDLE_START: - cpufreq_interactive_idle_start(); - break; - case IDLE_END: - cpufreq_interactive_idle_end(); - break; + case IDLE_START: + cpufreq_interactive_idle_start(); + break; + case IDLE_END: + cpufreq_interactive_idle_end(); + break; } - + return 0; } @@ -1037,34 +955,32 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, freq_table = cpufreq_frequency_get_table(policy->cpu); - if (!hispeed_freq) - hispeed_freq = policy->max; for_each_cpu(j, policy->cpus) { - unsigned long expires; - pcpu = &per_cpu(cpuinfo, j); pcpu->policy = policy; pcpu->target_freq = policy->cur; pcpu->freq_table = freq_table; + pcpu->freq_change_time_in_idle = + get_cpu_idle_time_us(j, + &pcpu->freq_change_time); + pcpu->time_in_idle = pcpu->freq_change_time_in_idle; + pcpu->idle_exit_time = pcpu->freq_change_time; + pcpu->freq_change_time_in_iowait = + get_cpu_iowait_time(j, NULL); + pcpu->time_in_iowait = pcpu->freq_change_time_in_iowait; + + pcpu->timer_idlecancel = 1; pcpu->floor_freq = pcpu->target_freq; pcpu->floor_validate_time = - ktime_to_us(ktime_get()); - pcpu->hispeed_validate_time = - pcpu->floor_validate_time; - down_write(&pcpu->enable_sem); - expires = jiffies + usecs_to_jiffies(timer_rate); - pcpu->cpu_timer.expires = expires; - add_timer_on(&pcpu->cpu_timer, j); - if (timer_slack_val >= 0) { - expires += usecs_to_jiffies(timer_slack_val); - pcpu->cpu_slack_timer.expires = expires; - add_timer_on(&pcpu->cpu_slack_timer, j); - } + pcpu->freq_change_time; pcpu->governor_enabled = 1; - up_write(&pcpu->enable_sem); + smp_wmb(); } + if (!hispeed_freq) + hispeed_freq = policy->max; + /* * Do not register the idle hook and create sysfs * entries if we have already done so. @@ -1080,29 +996,31 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, rc = input_register_handler(&cpufreq_interactive_input_handler); if (rc) pr_warn("%s: failed to register input handler\n", - __func__); - + __func__); + idle_notifier_register(&cpufreq_interactive_idle_nb); - cpufreq_register_notifier( - &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); break; case CPUFREQ_GOV_STOP: for_each_cpu(j, policy->cpus) { pcpu = &per_cpu(cpuinfo, j); - down_write(&pcpu->enable_sem); pcpu->governor_enabled = 0; + smp_wmb(); del_timer_sync(&pcpu->cpu_timer); - del_timer_sync(&pcpu->cpu_slack_timer); - up_write(&pcpu->enable_sem); + + /* + * Reset idle exit time since we may cancel the timer + * before it can run after the last idle exit time, + * to avoid tripping the check in idle exit for a timer + * that is trying to run. + */ + pcpu->idle_exit_time = 0; } flush_work(&inputopen.inputopen_work); if (atomic_dec_return(&active_count) > 0) return 0; - - cpufreq_unregister_notifier( - &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); + idle_notifier_unregister(&cpufreq_interactive_idle_nb); input_unregister_handler(&cpufreq_interactive_input_handler); sysfs_remove_group(cpufreq_global_kobject, @@ -1122,49 +1040,78 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, return 0; } -static void cpufreq_interactive_nop_timer(unsigned long data) -{ -} - static int __init cpufreq_interactive_init(void) { unsigned int i; struct cpufreq_interactive_cpuinfo *pcpu; - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + /* + * If MAX_USER_RT_PRIO < MAX_RT_PRIO the kernel thread has higher priority than any user thread + * In this case MAX_USER_RT_PRIO = 99 and MAX_RT_PRIO = 100, therefore boosting the priority of this + * kernel thread above user threads which will, by my reason, increase interactvitiy. + */ + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO-1 }; + + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + + go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; + min_sample_time = DEFAULT_MIN_SAMPLE_TIME; + above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; + timer_rate = DEFAULT_TIMER_RATE; /* Initalize per-cpu timers */ for_each_possible_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - init_timer_deferrable(&pcpu->cpu_timer); + init_timer(&pcpu->cpu_timer); pcpu->cpu_timer.function = cpufreq_interactive_timer; pcpu->cpu_timer.data = i; - init_timer(&pcpu->cpu_slack_timer); - pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; - spin_lock_init(&pcpu->load_lock); - init_rwsem(&pcpu->enable_sem); } - spin_lock_init(&target_loads_lock); spin_lock_init(&speedchange_cpumask_lock); + speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, NULL, - "cfinteractive"); + "cfinteractive"); if (IS_ERR(speedchange_task)) - return PTR_ERR(speedchange_task); + return PTR_ERR(speedchange_task); sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); get_task_struct(speedchange_task); inputopen_wq = create_workqueue("cfinteractive"); - + if (!inputopen_wq) goto err_freetask; - + INIT_WORK(&inputopen.inputopen_work, cpufreq_interactive_input_open); /* NB: wake up so the thread does not look hung to the freezer */ wake_up_process(speedchange_task); + pm_qos_add_request(&core_lock.qos_min_req, PM_QOS_MIN_ONLINE_CPUS, + PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + + pm_qos_add_request(&core_lock.qos_max_req, PM_QOS_MAX_ONLINE_CPUS, + PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); + + init_timer(&core_lock.unlock_timer); + core_lock.unlock_timer.function = cpufreq_interactive_core_lock_timer; + core_lock.unlock_timer.data = 0; + + core_lock.request_active = 0; + core_lock.lock_period = DEFAULT_CORE_LOCK_PERIOD; + mutex_init(&core_lock.mutex); + + core_lock.lock_task = kthread_create(cpufreq_interactive_lock_cores_task, NULL, + "kinteractive_lockcores"); + + if (IS_ERR(core_lock.lock_task)) + return PTR_ERR(core_lock.lock_task); + + sched_setscheduler_nocheck(core_lock.lock_task, SCHED_FIFO, ¶m); + get_task_struct(core_lock.lock_task); + + INIT_WORK(&core_lock.unlock_work, cpufreq_interactive_unlock_cores); return cpufreq_register_governor(&cpufreq_gov_interactive); err_freetask: @@ -1184,6 +1131,11 @@ static void __exit cpufreq_interactive_exit(void) kthread_stop(speedchange_task); put_task_struct(speedchange_task); destroy_workqueue(inputopen_wq); + + pm_qos_remove_request(&core_lock.qos_min_req); + pm_qos_remove_request(&core_lock.qos_max_req); + kthread_stop(core_lock.lock_task); + put_task_struct(core_lock.lock_task); } module_exit(cpufreq_interactive_exit); diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h index 951e6ca12da..64c9a825346 100644 --- a/include/trace/events/cpufreq_interactive.h +++ b/include/trace/events/cpufreq_interactive.h @@ -8,7 +8,7 @@ DECLARE_EVENT_CLASS(set, TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), + unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq), TP_STRUCT__entry( @@ -36,74 +36,68 @@ DEFINE_EVENT(set, cpufreq_interactive_setspeed, DECLARE_EVENT_CLASS(loadeval, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), + unsigned long curfreq, unsigned long targfreq), + TP_ARGS(cpu_id, load, curfreq, targfreq), TP_STRUCT__entry( __field(unsigned long, cpu_id ) __field(unsigned long, load ) - __field(unsigned long, curtarg ) - __field(unsigned long, curactual ) - __field(unsigned long, newtarg ) + __field(unsigned long, curfreq ) + __field(unsigned long, targfreq ) ), TP_fast_assign( __entry->cpu_id = cpu_id; __entry->load = load; - __entry->curtarg = curtarg; - __entry->curactual = curactual; - __entry->newtarg = newtarg; + __entry->curfreq = curfreq; + __entry->targfreq = targfreq; ), - TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", - __entry->cpu_id, __entry->load, __entry->curtarg, - __entry->curactual, __entry->newtarg) + TP_printk("cpu=%lu load=%lu cur=%lu targ=%lu", + __entry->cpu_id, __entry->load, __entry->curfreq, + __entry->targfreq) ); DEFINE_EVENT(loadeval, cpufreq_interactive_target, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) + unsigned long curfreq, unsigned long targfreq), + TP_ARGS(cpu_id, load, curfreq, targfreq) ); DEFINE_EVENT(loadeval, cpufreq_interactive_already, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) + unsigned long curfreq, unsigned long targfreq), + TP_ARGS(cpu_id, load, curfreq, targfreq) ); DEFINE_EVENT(loadeval, cpufreq_interactive_notyet, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curtarg, unsigned long curactual, - unsigned long newtarg), - TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) + unsigned long curfreq, unsigned long targfreq), + TP_ARGS(cpu_id, load, curfreq, targfreq) ); TRACE_EVENT(cpufreq_interactive_boost, - TP_PROTO(const char *s), - TP_ARGS(s), + TP_PROTO(unsigned long freq), + TP_ARGS(freq), TP_STRUCT__entry( - __string(s, s) + __field(unsigned long, freq) ), TP_fast_assign( - __assign_str(s, s); + __entry->freq = freq; ), - TP_printk("%s", __get_str(s)) + TP_printk("freq=%lu", __entry->freq) ); TRACE_EVENT(cpufreq_interactive_unboost, - TP_PROTO(const char *s), - TP_ARGS(s), + TP_PROTO(unsigned long freq), + TP_ARGS(freq), TP_STRUCT__entry( - __string(s, s) + __field(unsigned long, freq) ), TP_fast_assign( - __assign_str(s, s); + __entry->freq = freq; ), - TP_printk("%s", __get_str(s)) + TP_printk("freq=%lu", __entry->freq) ); #endif /* _TRACE_CPUFREQ_INTERACTIVE_H */ From 4bfc7f5f56e0cba4a14c4f77430a21bcc81da192 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 13:31:24 -0500 Subject: [PATCH 260/678] mach-tegra: tegra3_dvfs.c: workaround trickstermod merging bug --- arch/arm/mach-tegra/tegra3_dvfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index cdaeddb502c..01cb3583121 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -242,7 +242,7 @@ static struct dvfs core_dvfs_table[] = { CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 294000, 342000, 475000, 620000, 620000, 620000, 620000, 620000), #endif #ifdef CONFIG_LP_OC_666 - CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 550000, 666000, 666000, 666000, 666000, 666000), + CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 555000, 666000, 666000, 666000, 666000, 666000), #endif #ifdef CONFIG_LP_OC_700 CORE_DVFS("cpu_lp", lp_cpu_millivolts, 1, 1, KHZ, 204000, 342000, 475000, 620000, 700000, 700000, 700000, 700000, 700000), From 56eab8d551f93f5f123d44a49ba0050ec5f5a191 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 13:59:58 -0500 Subject: [PATCH 261/678] mach-tegra: cpuquiet.c: split G/LP delays --- arch/arm/mach-tegra/cpuquiet.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index c4c75b42ea3..5d6cbf45cc3 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -38,7 +38,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_CPQ_IDLE +#define UP2G_DELAY_MS 70 #define UP_DELAY_MS 70 +#define DOWN2LP_DELAY_MS 500 #define DOWN_DELAY_MS 500 static struct mutex *tegra3_cpu_lock; @@ -51,7 +53,9 @@ static struct kobject *tegra_auto_sysfs_kobject; static bool no_lp; static bool enable; static unsigned long up_delay; +static unsigned long up2g_delay; static unsigned long down_delay; +static unsigned long down2lp_delay; static int mp_overhead = 10; static unsigned int idle_top_freq; static unsigned int idle_bottom_freq; @@ -273,18 +277,18 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) /* Force switch */ cpq_state = TEGRA_CPQ_SWITCH_TO_G; queue_delayed_work( - cpuquiet_wq, &cpuquiet_work, up_delay); + cpuquiet_wq, &cpuquiet_work, up2g_delay); } return; } if (is_lp_cluster() && (cpu_freq >= idle_top_freq || no_lp)) { cpq_state = TEGRA_CPQ_SWITCH_TO_G; - queue_delayed_work(cpuquiet_wq, &cpuquiet_work, up_delay); + queue_delayed_work(cpuquiet_wq, &cpuquiet_work, up2g_delay); } else if (!is_lp_cluster() && !no_lp && cpu_freq <= idle_bottom_freq) { cpq_state = TEGRA_CPQ_SWITCH_TO_LP; - queue_delayed_work(cpuquiet_wq, &cpuquiet_work, down_delay); + queue_delayed_work(cpuquiet_wq, &cpuquiet_work, down2lp_delay); } else { cpq_state = TEGRA_CPQ_IDLE; } @@ -343,13 +347,17 @@ CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint); CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint); CPQ_BASIC_ATTRIBUTE(mp_overhead, 0644, int); CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(up2g_delay, 0644, ulong, delay_callback); CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback); +CPQ_ATTRIBUTE(down2lp_delay, 0644, ulong, delay_callback); CPQ_ATTRIBUTE(enable, 0644, bool, enable_callback); static struct attribute *tegra_auto_attributes[] = { &no_lp_attr.attr, &up_delay_attr.attr, + &up2g_delay_attr.attr, &down_delay_attr.attr, + &down2lp_delay_attr.attr, &idle_top_freq_attr.attr, &idle_bottom_freq_attr.attr, &mp_overhead_attr.attr, @@ -414,7 +422,9 @@ int tegra_auto_hotplug_init(struct mutex *cpu_lock) idle_bottom_freq = clk_get_min_rate(cpu_g_clk) / 1000; up_delay = msecs_to_jiffies(UP_DELAY_MS); + up2g_delay = msecs_to_jiffies(UP2G_DELAY_MS); down_delay = msecs_to_jiffies(DOWN_DELAY_MS); + down2lp_delay = msecs_to_jiffies(DOWN2LP_DELAY_MS); cpumask_clear(&cr_online_requests); tegra3_cpu_lock = cpu_lock; From c43e23f707c2100ab3c739a12aa66ce4dc36baca Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 14:03:48 -0500 Subject: [PATCH 262/678] mach-tegra: cpuquiet.c: don't let lp delay slow transition to multiple cores --- arch/arm/mach-tegra/cpuquiet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 5d6cbf45cc3..52dc96be4be 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -277,7 +277,7 @@ void tegra_auto_hotplug_governor(unsigned int cpu_freq, bool suspend) /* Force switch */ cpq_state = TEGRA_CPQ_SWITCH_TO_G; queue_delayed_work( - cpuquiet_wq, &cpuquiet_work, up2g_delay); + cpuquiet_wq, &cpuquiet_work, up_delay); } return; } From eab2aea166f361de898ed9c712ac36e15b8da8cc Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Thu, 20 Sep 2012 10:46:10 +0300 Subject: [PATCH 263/678] block: Adding ROW scheduling algorithm This patch adds the implementation of a new scheduling algorithm - ROW. The policy of this algorithm is to prioritize READ requests over WRITE as much as possible without starving the WRITE requests. Change-Id: I4ed52ea21d43b0e7c0769b2599779a3d3869c519 Signed-off-by: Tatyana Brokhman *Squashed commit from CodeAurora for ROW scheduler. **Backported from 3.4 version. --- Documentation/block/row-iosched.txt | 117 +++++ block/Kconfig.iosched | 22 + block/Makefile | 1 + block/row-iosched.c | 694 ++++++++++++++++++++++++++++ 4 files changed, 834 insertions(+) create mode 100644 Documentation/block/row-iosched.txt create mode 100644 block/row-iosched.c diff --git a/Documentation/block/row-iosched.txt b/Documentation/block/row-iosched.txt new file mode 100644 index 00000000000..987bd883444 --- /dev/null +++ b/Documentation/block/row-iosched.txt @@ -0,0 +1,117 @@ +Introduction +============ + +The ROW scheduling algorithm will be used in mobile devices as default +block layer IO scheduling algorithm. ROW stands for "READ Over WRITE" +which is the main requests dispatch policy of this algorithm. + +The ROW IO scheduler was developed with the mobile devices needs in +mind. In mobile devices we favor user experience upon everything else, +thus we want to give READ IO requests as much priority as possible. +The main idea of the ROW scheduling policy is: +If there are READ requests in pipe - dispatch them but don't starve +the WRITE requests too much. + +Software description +==================== +The requests are kept in queues according to their priority. The +dispatching of requests is done in a Round Robin manner with a +different slice for each queue. The dispatch quantum for a specific +queue is defined according to the queues priority. READ queues are +given bigger dispatch quantum than the WRITE queues, within a dispatch +cycle. + +At the moment there are 6 types of queues the requests are +distributed to: +- High priority READ queue +- High priority Synchronous WRITE queue +- Regular priority READ queue +- Regular priority Synchronous WRITE queue +- Regular priority WRITE queue +- Low priority READ queue + +If in a certain dispatch cycle one of the queues was empty and didn't +use its quantum that queue will be marked as "un-served". If we're in a +middle of a dispatch cycle dispatching from queue Y and a request +arrives for queue X that was un-served in the previous cycle, if X's +priority is higher than Y's, queue X will be preempted in the favor of +queue Y. This won't mean that cycle is restarted. The "dispatched" +counter of queue X will remain unchanged. Once queue Y uses up it's quantum +(or there will be no more requests left on it) we'll switch back to queue X +and allow it to finish it's quantum. + +For READ requests queues we allow idling in within a dispatch quantum in +order to give the application a chance to insert more requests. Idling +means adding some extra time for serving a certain queue even if the +queue is empty. The idling is enabled if we identify the application is +inserting requests in a high frequency. + +For idling on READ queues we use timer mechanism. When the timer expires, +if there are requests in the scheduler we will signal the underlying driver +(for example the MMC driver) to fetch another request for dispatch. + +The ROW algorithm takes the scheduling policy one step further, making +it a bit more "user-needs oriented", by allowing the application to +hint on the urgency of its requests. For example: even among the READ +requests several requests may be more urgent for completion then others. +The former will go to the High priority READ queue, that is given the +bigger dispatch quantum than any other queue. + +ROW scheduler will support special services for block devices that +supports High Priority Requests. That is, the scheduler may inform the +device upon urgent requests using new callback make_urgent_request. +In addition it will support rescheduling of requests that were +interrupted. For example, if the device issues a long write request and +a sudden high priority read interrupt pops in, the scheduler will +inform the device about the urgent request, so the device can stop the +current write request and serve the high priority read request. In such +a case the device may also send back to the scheduler the reminder of +the interrupted write request, such that the scheduler may continue +sending high priority requests without the need to interrupt the +ongoing write again and again. The write remainder will be sent later on +according to the scheduler policy. + +Design +====== +Existing algorithms (cfq, deadline) sort the io requests according LBA. +When deciding on the next request to dispatch they choose the closest +request to the current disk head position (from handling last +dispatched request). This is done in order to reduce the disk head +movement to a minimum. +We feel that this functionality isn't really needed in mobile devices. +Usually applications that write/read large chunks of data insert the +requests in already sorted LBA order. Thus dealing with sort trees adds +unnecessary complexity. + +We're planing to try this enhancement in the future to check if the +performance is influenced by it. + +SMP/multi-core +============== +At the moment the code is acceded from 2 contexts: +- Application context (from block/elevator layer): adding the requests. +- Underlying driver context (for example the mmc driver thread): dispatching + the requests and notifying on completion. + +One lock is used to synchronize between the two. This lock is provided +by the underlying driver along with the dispatch queue. + +Config options +============== +1. hp_read_quantum: dispatch quantum for the high priority READ queue +2. rp_read_quantum: dispatch quantum for the regular priority READ queue +3. hp_swrite_quantum: dispatch quantum for the high priority Synchronous + WRITE queue +4. rp_swrite_quantum: dispatch quantum for the regular priority + Synchronous WRITE queue +5. rp_write_quantum: dispatch quantum for the regular priority WRITE + queue +6. lp_read_quantum: dispatch quantum for the low priority READ queue +7. lp_swrite_quantum: dispatch quantum for the low priority Synchronous + WRITE queue +8. read_idle: how long to idle on read queue in Msec (in case idling + is enabled on that queue). +9. read_idle_freq: frequency of inserting READ requests that will + trigger idling. This is the time in Msec between inserting two READ + requests + diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 411f6b59700..8201a45cd26 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -21,6 +21,17 @@ config IOSCHED_DEADLINE a new point in the service tree and doing a batch of IO from there in case of expiry. +config IOSCHED_ROW + tristate "ROW I/O scheduler" + default y + ---help--- + The ROW I/O scheduler gives priority to READ requests over the + WRITE requests when dispatching, without starving WRITE requests. + Requests are kept in priority queues. Dispatching is done in a RR + manner when the dispatch quantum for each queue is calculated + according to queue priority. + Most suitable for mobile devices. + config IOSCHED_CFQ tristate "CFQ I/O scheduler" # If BLK_CGROUP is a module, CFQ has to be built as module. @@ -70,6 +81,16 @@ choice config DEFAULT_DEADLINE bool "Deadline" if IOSCHED_DEADLINE=y + config DEFAULT_ROW + bool "ROW" if IOSCHED_ROW=y + help + The ROW I/O scheduler gives priority to READ requests + over the WRITE requests when dispatching, without starving + WRITE requests. Requests are kept in priority queues. + Dispatching is done in a RR manner when the dispatch quantum + for each queue is defined according to queue priority. + Most suitable for mobile devices. + config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y @@ -87,6 +108,7 @@ endchoice config DEFAULT_IOSCHED string default "deadline" if DEFAULT_DEADLINE + default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO diff --git a/block/Makefile b/block/Makefile index 2f8fb116ac3..eb332a2d98c 100644 --- a/block/Makefile +++ b/block/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o +obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o diff --git a/block/row-iosched.c b/block/row-iosched.c new file mode 100644 index 00000000000..a76cad30084 --- /dev/null +++ b/block/row-iosched.c @@ -0,0 +1,694 @@ +/* + * ROW (Read Over Write) I/O scheduler. + * + * Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* See Documentation/block/row-iosched.txt */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * enum row_queue_prio - Priorities of the ROW queues + * + * This enum defines the priorities (and the number of queues) + * the requests will be disptributed to. The higher priority - + * the bigger is the dispatch quantum given to that queue. + * ROWQ_PRIO_HIGH_READ - is the higher priority queue. + * + */ +enum row_queue_prio { + ROWQ_PRIO_HIGH_READ = 0, + ROWQ_PRIO_REG_READ, + ROWQ_PRIO_HIGH_SWRITE, + ROWQ_PRIO_REG_SWRITE, + ROWQ_PRIO_REG_WRITE, + ROWQ_PRIO_LOW_READ, + ROWQ_PRIO_LOW_SWRITE, + ROWQ_MAX_PRIO, +}; + +/* Flags indicating whether idling is enabled on the queue */ +static const bool queue_idling_enabled[] = { + true, /* ROWQ_PRIO_HIGH_READ */ + true, /* ROWQ_PRIO_REG_READ */ + false, /* ROWQ_PRIO_HIGH_SWRITE */ + false, /* ROWQ_PRIO_REG_SWRITE */ + false, /* ROWQ_PRIO_REG_WRITE */ + false, /* ROWQ_PRIO_LOW_READ */ + false, /* ROWQ_PRIO_LOW_SWRITE */ +}; + +/* Default values for row queues quantums in each dispatch cycle */ +static const int queue_quantum[] = { + 100, /* ROWQ_PRIO_HIGH_READ */ + 100, /* ROWQ_PRIO_REG_READ */ + 2, /* ROWQ_PRIO_HIGH_SWRITE */ + 1, /* ROWQ_PRIO_REG_SWRITE */ + 1, /* ROWQ_PRIO_REG_WRITE */ + 1, /* ROWQ_PRIO_LOW_READ */ + 1 /* ROWQ_PRIO_LOW_SWRITE */ +}; + +/* Default values for idling on read queues (in msec) */ +#define ROW_IDLE_TIME_MSEC 5 +#define ROW_READ_FREQ_MSEC 20 + +/** + * struct rowq_idling_data - parameters for idling on the queue + * @last_insert_time: time the last request was inserted + * to the queue + * @begin_idling: flag indicating wether we should idle + * + */ +struct rowq_idling_data { + ktime_t last_insert_time; + bool begin_idling; +}; + +/** + * struct row_queue - requests grouping structure + * @rdata: parent row_data structure + * @fifo: fifo of requests + * @prio: queue priority (enum row_queue_prio) + * @nr_dispatched: number of requests already dispatched in + * the current dispatch cycle + * @slice: number of requests to dispatch in a cycle + * @idle_data: data for idling on queues + * + */ +struct row_queue { + struct row_data *rdata; + struct list_head fifo; + enum row_queue_prio prio; + + unsigned int nr_dispatched; + unsigned int slice; + + /* used only for READ queues */ + struct rowq_idling_data idle_data; +}; + +/** + * struct idling_data - data for idling on empty rqueue + * @idle_time: idling duration (jiffies) + * @freq: min time between two requests that + * triger idling (msec) + * @idle_work: pointer to struct delayed_work + * + */ +struct idling_data { + unsigned long idle_time; + u32 freq; + + struct workqueue_struct *idle_workqueue; + struct delayed_work idle_work; +}; + +/** + * struct row_queue - Per block device rqueue structure + * @dispatch_queue: dispatch rqueue + * @row_queues: array of priority request queues with + * dispatch quantum per rqueue + * @curr_queue: index in the row_queues array of the + * currently serviced rqueue + * @read_idle: data for idling after READ request + * @nr_reqs: nr_reqs[0] holds the number of all READ requests in + * scheduler, nr_reqs[1] holds the number of all WRITE + * requests in scheduler + * @cycle_flags: used for marking unserved queueus + * + */ +struct row_data { + struct request_queue *dispatch_queue; + + struct { + struct row_queue rqueue; + int disp_quantum; + } row_queues[ROWQ_MAX_PRIO]; + + enum row_queue_prio curr_queue; + + struct idling_data read_idle; + unsigned int nr_reqs[2]; + + unsigned int cycle_flags; +}; + +#define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elevator_private[0])) + +#define row_log(q, fmt, args...) \ + blk_add_trace_msg(q, "%s():" fmt , __func__, ##args) +#define row_log_rowq(rdata, rowq_id, fmt, args...) \ + blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \ + rowq_id, ##args) + +static inline void row_mark_rowq_unserved(struct row_data *rd, + enum row_queue_prio qnum) +{ + rd->cycle_flags |= (1 << qnum); +} + +static inline void row_clear_rowq_unserved(struct row_data *rd, + enum row_queue_prio qnum) +{ + rd->cycle_flags &= ~(1 << qnum); +} + +static inline int row_rowq_unserved(struct row_data *rd, + enum row_queue_prio qnum) +{ + return rd->cycle_flags & (1 << qnum); +} + +/******************** Static helper functions ***********************/ +/* + * kick_queue() - Wake up device driver queue thread + * @work: pointer to struct work_struct + * + * This is a idling delayed work function. It's purpose is to wake up the + * device driver in order for it to start fetching requests. + * + */ +static void kick_queue(struct work_struct *work) +{ + struct delayed_work *idle_work = to_delayed_work(work); + struct idling_data *read_data = + container_of(idle_work, struct idling_data, idle_work); + struct row_data *rd = + container_of(read_data, struct row_data, read_idle); + + row_log_rowq(rd, rd->curr_queue, "Performing delayed work"); + /* Mark idling process as done */ + rd->row_queues[rd->curr_queue].rqueue.idle_data.begin_idling = false; + + if (!(rd->nr_reqs[0] + rd->nr_reqs[1])) + row_log(rd->dispatch_queue, "No requests in scheduler"); + else { + spin_lock_irq(rd->dispatch_queue->queue_lock); + __blk_run_queue(rd->dispatch_queue); + spin_unlock_irq(rd->dispatch_queue->queue_lock); + } +} + +/* + * row_restart_disp_cycle() - Restart the dispatch cycle + * @rd: pointer to struct row_data + * + * This function restarts the dispatch cycle by: + * - Setting current queue to ROWQ_PRIO_HIGH_READ + * - For each queue: reset the number of requests dispatched in + * the cycle + */ +static inline void row_restart_disp_cycle(struct row_data *rd) +{ + int i; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) + rd->row_queues[i].rqueue.nr_dispatched = 0; + + rd->curr_queue = ROWQ_PRIO_HIGH_READ; + row_log(rd->dispatch_queue, "Restarting cycle"); +} + +static inline void row_get_next_queue(struct row_data *rd) +{ + rd->curr_queue++; + if (rd->curr_queue == ROWQ_MAX_PRIO) + row_restart_disp_cycle(rd); +} + +/******************* Elevator callback functions *********************/ + +/* + * row_add_request() - Add request to the scheduler + * @q: requests queue + * @rq: request to add + * + */ +static void row_add_request(struct request_queue *q, + struct request *rq) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + struct row_queue *rqueue = RQ_ROWQ(rq); + + list_add_tail(&rq->queuelist, &rqueue->fifo); + rd->nr_reqs[rq_data_dir(rq)]++; + rq_set_fifo_time(rq, jiffies); /* for statistics*/ + + if (queue_idling_enabled[rqueue->prio]) { + if (delayed_work_pending(&rd->read_idle.idle_work)) + (void)cancel_delayed_work( + &rd->read_idle.idle_work); + if (ktime_to_ms(ktime_sub(ktime_get(), + rqueue->idle_data.last_insert_time)) < + rd->read_idle.freq) { + rqueue->idle_data.begin_idling = true; + row_log_rowq(rd, rqueue->prio, "Enable idling"); + } else { + rqueue->idle_data.begin_idling = false; + row_log_rowq(rd, rqueue->prio, "Disable idling"); + } + + rqueue->idle_data.last_insert_time = ktime_get(); + } + row_log_rowq(rd, rqueue->prio, "added request"); +} + +/* + * row_remove_request() - Remove given request from scheduler + * @q: requests queue + * @rq: request to remove + * + */ +static void row_remove_request(struct request_queue *q, + struct request *rq) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + + rq_fifo_clear(rq); + rd->nr_reqs[rq_data_dir(rq)]--; +} + +/* + * row_dispatch_insert() - move request to dispatch queue + * @rd: pointer to struct row_data + * + * This function moves the next request to dispatch from + * rd->curr_queue to the dispatch queue + * + */ +static void row_dispatch_insert(struct row_data *rd) +{ + struct request *rq; + + rq = rq_entry_fifo(rd->row_queues[rd->curr_queue].rqueue.fifo.next); + row_remove_request(rd->dispatch_queue, rq); + elv_dispatch_add_tail(rd->dispatch_queue, rq); + rd->row_queues[rd->curr_queue].rqueue.nr_dispatched++; + row_clear_rowq_unserved(rd, rd->curr_queue); + row_log_rowq(rd, rd->curr_queue, " Dispatched request nr_disp = %d", + rd->row_queues[rd->curr_queue].rqueue.nr_dispatched); +} + +/* + * row_choose_queue() - choose the next queue to dispatch from + * @rd: pointer to struct row_data + * + * Updates rd->curr_queue. Returns 1 if there are requests to + * dispatch, 0 if there are no requests in scheduler + * + */ +static int row_choose_queue(struct row_data *rd) +{ + int prev_curr_queue = rd->curr_queue; + + if (!(rd->nr_reqs[0] + rd->nr_reqs[1])) { + row_log(rd->dispatch_queue, "No more requests in scheduler"); + return 0; + } + + row_get_next_queue(rd); + + /* + * Loop over all queues to find the next queue that is not empty. + * Stop when you get back to curr_queue + */ + while (list_empty(&rd->row_queues[rd->curr_queue].rqueue.fifo) + && rd->curr_queue != prev_curr_queue) { + /* Mark rqueue as unserved */ + row_mark_rowq_unserved(rd, rd->curr_queue); + row_get_next_queue(rd); + } + + return 1; +} + +/* + * row_dispatch_requests() - selects the next request to dispatch + * @q: requests queue + * @force: ignored + * + * Return 0 if no requests were moved to the dispatch queue. + * 1 otherwise + * + */ +static int row_dispatch_requests(struct request_queue *q, int force) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + int ret = 0, currq, i; + + currq = rd->curr_queue; + + /* + * Find the first unserved queue (with higher priority then currq) + * that is not empty + */ + for (i = 0; i < currq; i++) { + if (row_rowq_unserved(rd, i) && + !list_empty(&rd->row_queues[i].rqueue.fifo)) { + row_log_rowq(rd, currq, + " Preemting for unserved rowq%d", i); + rd->curr_queue = i; + row_dispatch_insert(rd); + ret = 1; + goto done; + } + } + + if (rd->row_queues[currq].rqueue.nr_dispatched >= + rd->row_queues[currq].disp_quantum) { + rd->row_queues[currq].rqueue.nr_dispatched = 0; + row_log_rowq(rd, currq, "Expiring rqueue"); + ret = row_choose_queue(rd); + if (ret) + row_dispatch_insert(rd); + goto done; + } + + /* Dispatch from curr_queue */ + if (list_empty(&rd->row_queues[currq].rqueue.fifo)) { + /* check idling */ + if (delayed_work_pending(&rd->read_idle.idle_work)) { + if (force) { + (void)cancel_delayed_work( + &rd->read_idle.idle_work); + row_log_rowq(rd, currq, + "Canceled delayed work - forced dispatch"); + } else { + row_log_rowq(rd, currq, + "Delayed work pending. Exiting"); + goto done; + } + } + + if (!force && queue_idling_enabled[currq] && + rd->row_queues[currq].rqueue.idle_data.begin_idling) { + if (!queue_delayed_work(rd->read_idle.idle_workqueue, + &rd->read_idle.idle_work, + rd->read_idle.idle_time)) { + row_log_rowq(rd, currq, + "Work already on queue!"); + pr_err("ROW_BUG: Work already on queue!"); + } else + row_log_rowq(rd, currq, + "Scheduled delayed work. exiting"); + goto done; + } else { + row_log_rowq(rd, currq, + "Currq empty. Choose next queue"); + ret = row_choose_queue(rd); + if (!ret) + goto done; + } + } + + ret = 1; + row_dispatch_insert(rd); + +done: + return ret; +} + +/* + * row_init_queue() - Init scheduler data structures + * @q: requests queue + * + * Return pointer to struct row_data to be saved in elevator for + * this dispatch queue + * + */ +static void *row_init_queue(struct request_queue *q) +{ + + struct row_data *rdata; + int i; + + rdata = kmalloc_node(sizeof(*rdata), + GFP_KERNEL | __GFP_ZERO, q->node); + if (!rdata) + return NULL; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) { + INIT_LIST_HEAD(&rdata->row_queues[i].rqueue.fifo); + rdata->row_queues[i].disp_quantum = queue_quantum[i]; + rdata->row_queues[i].rqueue.rdata = rdata; + rdata->row_queues[i].rqueue.prio = i; + rdata->row_queues[i].rqueue.idle_data.begin_idling = false; + rdata->row_queues[i].rqueue.idle_data.last_insert_time = + ktime_set(0, 0); + } + + /* + * Currently idling is enabled only for READ queues. If we want to + * enable it for write queues also, note that idling frequency will + * be the same in both cases + */ + rdata->read_idle.idle_time = msecs_to_jiffies(ROW_IDLE_TIME_MSEC); + /* Maybe 0 on some platforms */ + if (!rdata->read_idle.idle_time) + rdata->read_idle.idle_time = 1; + rdata->read_idle.freq = ROW_READ_FREQ_MSEC; + rdata->read_idle.idle_workqueue = alloc_workqueue("row_idle_work", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (!rdata->read_idle.idle_workqueue) + panic("Failed to create idle workqueue\n"); + INIT_DELAYED_WORK(&rdata->read_idle.idle_work, kick_queue); + + rdata->curr_queue = ROWQ_PRIO_HIGH_READ; + rdata->dispatch_queue = q; + + rdata->nr_reqs[READ] = rdata->nr_reqs[WRITE] = 0; + + return rdata; +} + +/* + * row_exit_queue() - called on unloading the RAW scheduler + * @e: poiner to struct elevator_queue + * + */ +static void row_exit_queue(struct elevator_queue *e) +{ + struct row_data *rd = (struct row_data *)e->elevator_data; + int i; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) + BUG_ON(!list_empty(&rd->row_queues[i].rqueue.fifo)); + (void)cancel_delayed_work_sync(&rd->read_idle.idle_work); + BUG_ON(delayed_work_pending(&rd->read_idle.idle_work)); + destroy_workqueue(rd->read_idle.idle_workqueue); + kfree(rd); +} + +/* + * row_merged_requests() - Called when 2 requests are merged + * @q: requests queue + * @rq: request the two requests were merged into + * @next: request that was merged + */ +static void row_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + struct row_queue *rqueue = RQ_ROWQ(next); + + list_del_init(&next->queuelist); + + rqueue->rdata->nr_reqs[rq_data_dir(rq)]--; +} + +/* + * get_queue_type() - Get queue type for a given request + * + * This is a helping function which purpose is to determine what + * ROW queue the given request should be added to (and + * dispatched from leter on) + * + * TODO: Right now only 3 queues are used REG_READ, REG_WRITE + * and REG_SWRITE + */ +static enum row_queue_prio get_queue_type(struct request *rq) +{ + const int data_dir = rq_data_dir(rq); + const bool is_sync = rq_is_sync(rq); + + if (data_dir == READ) + return ROWQ_PRIO_REG_READ; + else if (is_sync) + return ROWQ_PRIO_REG_SWRITE; + else + return ROWQ_PRIO_REG_WRITE; +} + +/* + * row_set_request() - Set ROW data structures associated with this request. + * @q: requests queue + * @rq: pointer to the request + * @gfp_mask: ignored + * + */ +static int +row_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) +{ + struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + rq->elevator_private[0] = + (void *)(&rd->row_queues[get_queue_type(rq)]); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 0; +} + +/********** Helping sysfs functions/defenitions for ROW attributes ******/ +static ssize_t row_var_show(int var, char *page) +{ + return snprintf(page, 100, "%d\n", var); +} + +static ssize_t row_var_store(int *var, const char *page, size_t count) +{ + int err; + err = kstrtoul(page, 10, (unsigned long *)var); + + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct row_data *rowd = e->elevator_data; \ + int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return row_var_show(__data, (page)); \ +} +SHOW_FUNCTION(row_hp_read_quantum_show, + rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 0); +SHOW_FUNCTION(row_rp_read_quantum_show, + rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum, 0); +SHOW_FUNCTION(row_hp_swrite_quantum_show, + rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum, 0); +SHOW_FUNCTION(row_rp_swrite_quantum_show, + rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum, 0); +SHOW_FUNCTION(row_rp_write_quantum_show, + rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum, 0); +SHOW_FUNCTION(row_lp_read_quantum_show, + rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, 0); +SHOW_FUNCTION(row_lp_swrite_quantum_show, + rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, 0); +SHOW_FUNCTION(row_read_idle_show, rowd->read_idle.idle_time, 1); +SHOW_FUNCTION(row_read_idle_freq_show, rowd->read_idle.freq, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, \ + const char *page, size_t count) \ +{ \ + struct row_data *rowd = e->elevator_data; \ + int __data; \ + int ret = row_var_store(&__data, (page), count); \ + if (__CONV) \ + __data = (int)msecs_to_jiffies(__data); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(row_hp_read_quantum_store, +&rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 1, INT_MAX, 0); +STORE_FUNCTION(row_rp_read_quantum_store, + &rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_hp_swrite_quantum_store, + &rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_rp_swrite_quantum_store, + &rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_rp_write_quantum_store, + &rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_lp_read_quantum_store, + &rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, + 1, INT_MAX, 0); +STORE_FUNCTION(row_lp_swrite_quantum_store, + &rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, + 1, INT_MAX, 1); +STORE_FUNCTION(row_read_idle_store, &rowd->read_idle.idle_time, 1, INT_MAX, 1); +STORE_FUNCTION(row_read_idle_freq_store, &rowd->read_idle.freq, 1, INT_MAX, 0); + +#undef STORE_FUNCTION + +#define ROW_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, row_##name##_show, \ + row_##name##_store) + +static struct elv_fs_entry row_attrs[] = { + ROW_ATTR(hp_read_quantum), + ROW_ATTR(rp_read_quantum), + ROW_ATTR(hp_swrite_quantum), + ROW_ATTR(rp_swrite_quantum), + ROW_ATTR(rp_write_quantum), + ROW_ATTR(lp_read_quantum), + ROW_ATTR(lp_swrite_quantum), + ROW_ATTR(read_idle), + ROW_ATTR(read_idle_freq), + __ATTR_NULL +}; + +static struct elevator_type iosched_row = { + .ops = { + .elevator_merge_req_fn = row_merged_requests, + .elevator_dispatch_fn = row_dispatch_requests, + .elevator_add_req_fn = row_add_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_set_req_fn = row_set_request, + .elevator_init_fn = row_init_queue, + .elevator_exit_fn = row_exit_queue, + }, + + .elevator_attrs = row_attrs, + .elevator_name = "row", + .elevator_owner = THIS_MODULE, +}; + +static int __init row_init(void) +{ + elv_register(&iosched_row); + return 0; +} + +static void __exit row_exit(void) +{ + elv_unregister(&iosched_row); +} + +module_init(row_init); +module_exit(row_exit); + +MODULE_LICENSE("GPLv2"); +MODULE_DESCRIPTION("Read Over Write IO scheduler"); From a9ec61e526360517686e0b71cb1e3f43e18e4b80 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Apr 2012 23:52:50 -0400 Subject: [PATCH 264/678] switch do_fsync() to fget_light() Signed-off-by: Al Viro Conflicts: fs/sync.c --- fs/sync.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index b918fb9649e..a10e39a4b6a 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -209,16 +209,17 @@ static int do_fsync(unsigned int fd, int datasync) { struct file *file; int ret = -EBADF; + int fput_needed; #ifdef CONFIG_FSYNC_CONTROL if (!fsynccontrol_fsync_enabled()) return 0; #endif - file = fget(fd); + file = fget_light(fd, &fput_needed); if (file) { ret = vfs_fsync(file, datasync); - fput(file); + fput_light(file, fput_needed); } return ret; } From 93e4c245e362b3dd7faa9935172bba216551b31f Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 15:17:59 -0500 Subject: [PATCH 265/678] mach-tegra: cpu-tegra3.c: tweak delays --- arch/arm/mach-tegra/cpu-tegra3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 04feb500386..98ab950d70e 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,9 +39,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 100 -#define UP2Gn_DELAY_MS 200 -#define DOWN_DELAY_MS 4000 +#define UP2G0_DELAY_MS 384 +#define UP2Gn_DELAY_MS 128 +#define DOWN_DELAY_MS 2112 static struct mutex *tegra3_cpu_lock; From 544eeb7be1e03f5e1e52c9a4cdb7deed7b8a190a Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 15:18:43 -0500 Subject: [PATCH 266/678] cpufreq: cpufreq_ondemand.c: tweak --- drivers/cpufreq/cpufreq_ondemand.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 67ef5e9fe7f..64975af087d 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -37,12 +37,12 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) -#define DEF_SAMPLING_DOWN_FACTOR (3) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define DEF_FREQUENCY_UP_THRESHOLD (88) +#define DEF_SAMPLING_DOWN_FACTOR (2) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define MICRO_FREQUENCY_UP_THRESHOLD (80) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define MICRO_FREQUENCY_UP_THRESHOLD (88) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) From 9bcbe8d639653552880c40b8b4544b4129cd9cbe Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 15:19:11 -0500 Subject: [PATCH 267/678] mach-tegra: cpuquiet.c: tweak delays --- arch/arm/mach-tegra/cpuquiet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 52dc96be4be..467320e9ec4 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -38,10 +38,10 @@ #include "clock.h" #define INITIAL_STATE TEGRA_CPQ_IDLE -#define UP2G_DELAY_MS 70 -#define UP_DELAY_MS 70 -#define DOWN2LP_DELAY_MS 500 -#define DOWN_DELAY_MS 500 +#define UP2G_DELAY_MS 384 +#define UP_DELAY_MS 128 +#define DOWN2LP_DELAY_MS 4224 +#define DOWN_DELAY_MS 2112 static struct mutex *tegra3_cpu_lock; static struct workqueue_struct *cpuquiet_wq; From f74713e5470889c50d8b0759c08502618d538498 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 15:19:32 -0500 Subject: [PATCH 268/678] makefile: minor changeup --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 708bac93604..eafc8ed4e13 100644 --- a/Makefile +++ b/Makefile @@ -371,7 +371,7 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -Wno-format-security \ -fno-delete-null-pointer-checks -mno-unaligned-access \ -mtune=cortex-a8 -march=armv7-a -mfpu=neon \ - -fpredictive-commoning -fgcse-after-reload -ftree-vectorize \ + -fpredictive-commoning -fgcse-after-reload -ftree-vectorize -mvectorize-with-neon-quad \ -fipa-cp-clone -fsingle-precision-constant \ -funswitch-loops -floop-interchange \ -floop-strip-mine -floop-block From a782e79326763d38231aa688f93c2d67e5f8a20b Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 15:19:59 -0500 Subject: [PATCH 269/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 2beb22e4f03..7e3b9e1ebce 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -353,7 +353,7 @@ CONFIG_TEGRA_EDP_EXACT_FREQ=y CONFIG_TEGRA_BB_XMM_POWER=y # CONFIG_TEGRA_BB_XMM_POWER2 is not set # CONFIG_TEGRA_THERMAL_SYSFS is not set -# CONFIG_TEGRA_PLLM_RESTRICTED is not set +CONFIG_TEGRA_PLLM_RESTRICTED=y # CONFIG_TEGRA_WDT_RECOVERY is not set CONFIG_TEGRA_LP2_ARM_TWD=y # CONFIG_TEGRA_RAIL_OFF_MULTIPLE_CPUS is not set @@ -502,7 +502,9 @@ CONFIG_CMDLINE="tegra_wdt.heartbeat=30" CONFIG_CMDLINE_EXTEND=y # CONFIG_CMDLINE_FORCE is not set # CONFIG_XIP_KERNEL is not set -# CONFIG_KEXEC is not set +CONFIG_KEXEC=y +CONFIG_ATAGS_PROC=y +CONFIG_KEXEC_HARDBOOT=y # CONFIG_CRASH_DUMP is not set # CONFIG_AUTO_ZRELADDR is not set @@ -520,9 +522,9 @@ CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set From 364dcd13849acaa7f04cca6fbb992c0cd3961388 Mon Sep 17 00:00:00 2001 From: Paul Beeler Date: Thu, 4 Oct 2012 07:01:04 -0600 Subject: [PATCH 270/678] Block: Add the BFQ-v5 I/O scheduler to 3.1 The general structure is borrowed from CFQ, as much code. A (bfq_)queue is associated to each task doing I/O on a device, and each time a scheduling decision has to be taken a queue is selected and it is served until it expires. - Slices are given in the service domain: tasks are assigned budgets, measured in number of sectors. Once got the disk, a task must however consume its assigned budget within a configurable maximum time (by default, the maximum possible value of the budgets is automatically computed to comply with this timeout). This allows the desired latency vs "throughput boosting" tradeoff to be set. - Budgets are scheduled according to a variant of WF2Q+, implemented using an augmented rb-tree to take eligibility into account while preserving an O(log N) overall complexity. - A low-latency tunable is provided; if enabled, both interactive and soft real-time applications are guaranteed very low latency. - Latency guarantees are preserved also in presence of NCQ. - High throughput with flash-based devices, while still preserving latency guarantees. - Useful features borrowed from CFQ: cooperating-queues merging (with some additional optimizations with respect to the original CFQ version), static fallback queue for OOM. - BFQ supports full hierarchical scheduling, exporting a cgroups interface. Each node has a full scheduler, so each group can be assigned its own ioprio and an ioprio_class. - If the cgroups interface is used, weights can be explictly assigned, otherwise ioprio values are mapped to weights using the relation weight = IOPRIO_BE_NR - ioprio. - ioprio classes are served in strict priority order, i.e., lower priority queues are not served as long as there are higher priority queues. Among queues in the same class the bandwidth is distributed in proportion to the weights of each queue. A very thin extra bandwidth is however guaranteed to the Idle class, to prevent it from starving. Signed-off-by: Paul Beeler Conflicts: block/Kconfig.iosched block/Makefile --- block/Kconfig.iosched | 26 + block/Makefile | 1 + block/bfq-cgroup.c | 831 +++++++++ block/bfq-ioc.c | 380 +++++ block/bfq-iosched.c | 3021 +++++++++++++++++++++++++++++++++ block/bfq-sched.c | 1066 ++++++++++++ block/bfq.h | 593 +++++++ block/blk-ioc.c | 30 +- block/cfq-iosched.c | 10 +- fs/ioprio.c | 9 +- include/linux/cgroup_subsys.h | 6 + include/linux/iocontext.h | 18 +- 12 files changed, 5970 insertions(+), 21 deletions(-) create mode 100644 block/bfq-cgroup.c create mode 100644 block/bfq-ioc.c create mode 100644 block/bfq-iosched.c create mode 100644 block/bfq-sched.c create mode 100644 block/bfq.h diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 8201a45cd26..06ec27e59a0 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -71,6 +71,28 @@ config IOSCHED_VR Requests are chosen according to SSTF with a penalty of rev_penalty for switching head direction. +config IOSCHED_BFQ + tristate "BFQ I/O scheduler" + depends on EXPERIMENTAL + default n + ---help--- + The BFQ I/O scheduler tries to distribute bandwidth among + all processes according to their weights. + It aims at distributing the bandwidth as desired, independently of + the disk parameters and with any workload. It also tries to + guarantee low latency to interactive and soft real-time + applications. If compiled built-in (saying Y here), BFQ can + be configured to support hierarchical scheduling. + +config CGROUP_BFQIO + bool "BFQ hierarchical scheduling support" + depends on CGROUPS && IOSCHED_BFQ=y + default n + ---help--- + Enable hierarchical scheduling in BFQ, using the cgroups + filesystem interface. The name of the subsystem will be + bfqio. + choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -94,6 +116,9 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y + config DEFAULT_BFQ + bool "BFQ" if IOSCHED_BFQ=y + config DEFAULT_NOOP bool "No-op" @@ -110,6 +135,7 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ + default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO default "vr" if DEFAULT_VR diff --git a/block/Makefile b/block/Makefile index eb332a2d98c..760d8f3ff2e 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o +obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c new file mode 100644 index 00000000000..74ae73b91e1 --- /dev/null +++ b/block/bfq-cgroup.c @@ -0,0 +1,831 @@ +/* + * BFQ: CGROUPS support. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. + */ + +#ifdef CONFIG_CGROUP_BFQIO +static struct bfqio_cgroup bfqio_root_cgroup = { + .weight = BFQ_DEFAULT_GRP_WEIGHT, + .ioprio = BFQ_DEFAULT_GRP_IOPRIO, + .ioprio_class = BFQ_DEFAULT_GRP_CLASS, +}; + +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; +} + +static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) +{ + return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), + struct bfqio_cgroup, css); +} + +/* + * Search the bfq_group for bfqd into the hash table (by now only a list) + * of bgrp. Must be called under rcu_read_lock(). + */ +static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, + struct bfq_data *bfqd) +{ + struct bfq_group *bfqg; + struct hlist_node *n; + void *key; + + hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) { + key = rcu_dereference(bfqg->bfqd); + if (key == bfqd) + return bfqg; + } + + return NULL; +} + +static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, + struct bfq_group *bfqg) +{ + struct bfq_entity *entity = &bfqg->entity; + + entity->weight = entity->new_weight = bgrp->weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio = bgrp->ioprio; + entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; + entity->ioprio_changed = 1; + entity->my_sched_data = &bfqg->sched_data; +} + +static inline void bfq_group_set_parent(struct bfq_group *bfqg, + struct bfq_group *parent) +{ + struct bfq_entity *entity; + + BUG_ON(parent == NULL); + BUG_ON(bfqg == NULL); + + entity = &bfqg->entity; + entity->parent = parent->my_entity; + entity->sched_data = &parent->sched_data; +} + +/** + * bfq_group_chain_alloc - allocate a chain of groups. + * @bfqd: queue descriptor. + * @cgroup: the leaf cgroup this chain starts from. + * + * Allocate a chain of groups starting from the one belonging to + * @cgroup up to the root cgroup. Stop if a cgroup on the chain + * to the root has already an allocated group on @bfqd. + */ +static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; + + for (; cgroup != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) { + /* + * All the cgroups in the path from there to the + * root must have a bfq_group for bfqd, so we don't + * need any more allocations. + */ + break; + } + + bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); + if (bfqg == NULL) + goto cleanup; + + bfq_group_init_entity(bgrp, bfqg); + bfqg->my_entity = &bfqg->entity; + + if (leaf == NULL) { + leaf = bfqg; + prev = leaf; + } else { + bfq_group_set_parent(prev, bfqg); + /* + * Build a list of allocated nodes using the bfqd + * filed, that is still unused and will be initialized + * only after the node will be connected. + */ + prev->bfqd = bfqg; + prev = bfqg; + } + } + + return leaf; + +cleanup: + while (leaf != NULL) { + prev = leaf; + leaf = leaf->bfqd; + kfree(prev); + } + + return NULL; +} + +/** + * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy. + * @bfqd: the queue descriptor. + * @cgroup: the leaf cgroup to start from. + * @leaf: the leaf group (to be associated to @cgroup). + * + * Try to link a chain of groups to a cgroup hierarchy, connecting the + * nodes bottom-up, so we can be sure that when we find a cgroup in the + * hierarchy that already as a group associated to @bfqd all the nodes + * in the path to the root cgroup have one too. + * + * On locking: the queue lock protects the hierarchy (there is a hierarchy + * per device) while the bfqio_cgroup lock protects the list of groups + * belonging to the same cgroup. + */ +static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, + struct bfq_group *leaf) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *next, *prev = NULL; + unsigned long flags; + + assert_spin_locked(bfqd->queue->queue_lock); + + for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + next = leaf->bfqd; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + BUG_ON(bfqg != NULL); + + spin_lock_irqsave(&bgrp->lock, flags); + + rcu_assign_pointer(leaf->bfqd, bfqd); + hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); + hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); + + spin_unlock_irqrestore(&bgrp->lock, flags); + + prev = leaf; + leaf = next; + } + + BUG_ON(cgroup == NULL && leaf != NULL); + if (cgroup != NULL && prev != NULL) { + bgrp = cgroup_to_bfqio(cgroup); + bfqg = bfqio_lookup_group(bgrp, bfqd); + bfq_group_set_parent(prev, bfqg); + } +} + +/** + * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. + * @bfqd: queue descriptor. + * @cgroup: cgroup being searched for. + * + * Return a group associated to @bfqd in @cgroup, allocating one if + * necessary. When a group is returned all the cgroups in the path + * to the root have a group associated to @bfqd. + * + * If the allocation fails, return the root group: this breaks guarantees + * but is a safe fallbak. If this loss becames a problem it can be + * mitigated using the equivalent weight (given by the product of the + * weights of the groups in the path from @group to the root) in the + * root scheduler. + * + * We allocate all the missing nodes in the path from the leaf cgroup + * to the root and we connect the nodes only after all the allocations + * have been successful. + */ +static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct bfq_group *bfqg; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) + return bfqg; + + bfqg = bfq_group_chain_alloc(bfqd, cgroup); + if (bfqg != NULL) + bfq_group_chain_link(bfqd, cgroup, bfqg); + else + bfqg = bfqd->root_group; + + return bfqg; +} + +/** + * bfq_bfqq_move - migrate @bfqq to @bfqg. + * @bfqd: queue descriptor. + * @bfqq: the queue to move. + * @entity: @bfqq's entity. + * @bfqg: the group to move to. + * + * Move @bfqq to @bfqg, deactivating it from its old group and reactivating + * it on the new one. Avoid putting the entity on the old group idle tree. + * + * Must be called under the queue lock; the cgroup owning @bfqg must + * not disappear (by now this just means that we are called under + * rcu_read_lock()). + */ +static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct bfq_entity *entity, struct bfq_group *bfqg) +{ + int busy, resume; + + busy = bfq_bfqq_busy(bfqq); + resume = !RB_EMPTY_ROOT(&bfqq->sort_list); + + BUG_ON(resume && !entity->on_st); + BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue); + + if (busy) { + BUG_ON(atomic_read(&bfqq->ref) < 2); + + if (!resume) + bfq_del_bfqq_busy(bfqd, bfqq, 0); + else + bfq_deactivate_bfqq(bfqd, bfqq, 0); + } else if (entity->on_st) + bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); + + /* + * Here we use a reference to bfqg. We don't need a refcounter + * as the cgroup reference will not be dropped, so that its + * destroy() callback will not be invoked. + */ + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; + + if (busy && resume) + bfq_activate_bfqq(bfqd, bfqq); +} + +/** + * __bfq_cic_change_cgroup - move @cic to @cgroup. + * @bfqd: the queue descriptor. + * @cic: the cic to move. + * @cgroup: the cgroup to move to. + * + * Move cic to cgroup, assuming that bfqd->queue is locked; the caller + * has to make sure that the reference to cgroup is valid across the call. + * + * NOTE: an alternative approach might have been to store the current + * cgroup in bfqq and getting a reference to it, reducing the lookup + * time here, at the price of slightly more complex code. + */ +static struct bfq_group *__bfq_cic_change_cgroup(struct bfq_data *bfqd, + struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_queue *async_bfqq = cic_to_bfqq(cic, 0); + struct bfq_queue *sync_bfqq = cic_to_bfqq(cic, 1); + struct bfq_entity *entity; + struct bfq_group *bfqg; + + bfqg = bfq_find_alloc_group(bfqd, cgroup); + if (async_bfqq != NULL) { + entity = &async_bfqq->entity; + + if (entity->sched_data != &bfqg->sched_data) { + cic_set_bfqq(cic, NULL, 0); + bfq_log_bfqq(bfqd, async_bfqq, + "cic_change_group: %p %d", + async_bfqq, atomic_read(&async_bfqq->ref)); + bfq_put_queue(async_bfqq); + } + } + + if (sync_bfqq != NULL) { + entity = &sync_bfqq->entity; + if (entity->sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); + } + + return bfqg; +} + +/** + * bfq_cic_change_cgroup - move @cic to @cgroup. + * @cic: the cic being migrated. + * @cgroup: the destination cgroup. + * + * When the task owning @cic is moved to @cgroup, @cic is immediately + * moved into its new parent group. + */ +static void bfq_cic_change_cgroup(struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL && + !strncmp(bfqd->queue->elevator->elevator_type->elevator_name, + "bfq", ELV_NAME_MAX)) { + __bfq_cic_change_cgroup(bfqd, cic, cgroup); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_cic_update_cgroup - update the cgroup of @cic. + * @cic: the @cic to update. + * + * Make sure that @cic is enqueued in the cgroup of the current task. + * We need this in addition to moving cics during the cgroup attach + * phase because the task owning @cic could be at its first disk + * access or we may end up in the root cgroup as the result of a + * memory allocation failure and here we try to move to the right + * group. + * + * Must be called under the queue lock. It is safe to use the returned + * value even after the rcu_read_unlock() as the migration/destruction + * paths act under the queue lock too. IOW it is impossible to race with + * group migration/destruction and end up with an invalid group as: + * a) here cgroup has not yet been destroyed, nor its destroy callback + * has started execution, as current holds a reference to it, + * b) if it is destroyed after rcu_read_unlock() [after current is + * migrated to a different cgroup] its attach() callback will have + * taken care of remove all the references to the old cgroup data. + */ +static struct bfq_group *bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + struct bfq_group *bfqg; + struct cgroup *cgroup; + + BUG_ON(bfqd == NULL); + + rcu_read_lock(); + cgroup = task_cgroup(current, bfqio_subsys_id); + bfqg = __bfq_cic_change_cgroup(bfqd, cic, cgroup); + rcu_read_unlock(); + + return bfqg; +} + +/** + * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. + * @st: the service tree being flushed. + */ +static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) +{ + struct bfq_entity *entity = st->first_idle; + + for (; entity != NULL; entity = st->first_idle) + __bfq_deactivate_entity(entity, 0); +} + +/** + * bfq_reparent_leaf_entity - move leaf entity to the root_group. + * @bfqd: the device data structure with the root group. + * @entity: the entity to move. + */ +static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(bfqq == NULL); + bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); + return; +} + +/** + * bfq_reparent_active_entities - move to the root group all active entities. + * @bfqd: the device data structure with the root group. + * @bfqg: the group to move from. + * @st: the service tree with the entities. + * + * Needs queue_lock to be taken and reference to be valid over the call. + */ +static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st) +{ + struct rb_root *active = &st->active; + struct bfq_entity *entity = NULL; + + if (!RB_EMPTY_ROOT(&st->active)) + entity = bfq_entity_of(rb_first(active)); + + for (; entity != NULL ; entity = bfq_entity_of(rb_first(active))) + bfq_reparent_leaf_entity(bfqd, entity); + + if (bfqg->sched_data.active_entity != NULL) + bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity); + + return; +} + +/** + * bfq_destroy_group - destroy @bfqg. + * @bgrp: the bfqio_cgroup containing @bfqg. + * @bfqg: the group being destroyed. + * + * Destroy @bfqg, making sure that it is not referenced from its parent. + */ +static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) +{ + struct bfq_data *bfqd; + struct bfq_service_tree *st; + struct bfq_entity *entity = bfqg->my_entity; + unsigned long uninitialized_var(flags); + int i; + + hlist_del(&bfqg->group_node); + + /* + * Empty all service_trees belonging to this group before deactivating + * the group itself. + */ + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { + st = bfqg->sched_data.service_tree + i; + + /* + * The idle tree may still contain bfq_queues belonging + * to exited task because they never migrated to a different + * cgroup from the one being destroyed now. Noone else + * can access them so it's safe to act without any lock. + */ + bfq_flush_idle_tree(st); + + /* + * It may happen that some queues are still active + * (busy) upon group destruction (if the corresponding + * processes have been forced to terminate). We move + * all the leaf entities corresponding to these queues + * to the root_group. + * Also, it may happen that the group has an entity + * under service, which is disconnected from the active + * tree: it must be moved, too. + * There is no need to put the sync queues, as the + * scheduler has taken no reference. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(!RB_EMPTY_ROOT(&st->active)); + BUG_ON(!RB_EMPTY_ROOT(&st->idle)); + } + BUG_ON(bfqg->sched_data.next_active != NULL); + BUG_ON(bfqg->sched_data.active_entity != NULL); + + /* + * We may race with device destruction, take extra care when + * dereferencing bfqg->bfqd. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + hlist_del(&bfqg->bfqd_node); + __bfq_deactivate_entity(entity, 0); + bfq_put_async_queues(bfqd, bfqg); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(entity->tree != NULL); + + /* + * No need to defer the kfree() to the end of the RCU grace + * period: we are called from the destroy() callback of our + * cgroup, so we can be sure that noone is a) still using + * this cgroup or b) doing lookups in it. + */ + kfree(bfqg); +} + +/** + * bfq_disconnect_groups - diconnect @bfqd from all its groups. + * @bfqd: the device descriptor being exited. + * + * When the device exits we just make sure that no lookup can return + * the now unused group structures. They will be deallocated on cgroup + * destruction. + */ +static void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + struct hlist_node *pos, *n; + struct bfq_group *bfqg; + + bfq_log(bfqd, "disconnect_groups beginning") ; + hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) { + hlist_del(&bfqg->bfqd_node); + + __bfq_deactivate_entity(bfqg->my_entity, 0); + + /* + * Don't remove from the group hash, just set an + * invalid key. No lookups can race with the + * assignment as bfqd is being destroyed; this + * implies also that new elements cannot be added + * to the list. + */ + rcu_assign_pointer(bfqg->bfqd, NULL); + + bfq_log(bfqd, "disconnect_groups: put async for group %p", + bfqg) ; + bfq_put_async_queues(bfqd, bfqg); + } +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; + struct bfq_group *bfqg = bfqd->root_group; + + bfq_put_async_queues(bfqd, bfqg); + + spin_lock_irq(&bgrp->lock); + hlist_del_rcu(&bfqg->group_node); + spin_unlock_irq(&bgrp->lock); + + /* + * No need to synchronize_rcu() here: since the device is gone + * there cannot be any read-side access to its root_group. + */ + kfree(bfqg); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + struct bfqio_cgroup *bgrp; + int i; + + bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); + if (bfqg == NULL) + return NULL; + + bfqg->entity.parent = NULL; + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + bgrp = &bfqio_root_cgroup; + spin_lock_irq(&bgrp->lock); + rcu_assign_pointer(bfqg->bfqd, bfqd); + hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); + spin_unlock_irq(&bgrp->lock); + + return bfqg; +} + +#define SHOW_FUNCTION(__VAR) \ +static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ + struct cftype *cftype) \ +{ \ + struct bfqio_cgroup *bgrp; \ + u64 ret; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + spin_lock_irq(&bgrp->lock); \ + ret = bgrp->__VAR; \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return ret; \ +} + +SHOW_FUNCTION(weight); +SHOW_FUNCTION(ioprio); +SHOW_FUNCTION(ioprio_class); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ +static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ + struct cftype *cftype, \ + u64 val) \ +{ \ + struct bfqio_cgroup *bgrp; \ + struct bfq_group *bfqg; \ + struct hlist_node *n; \ + \ + if (val < (__MIN) || val > (__MAX)) \ + return -EINVAL; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + \ + spin_lock_irq(&bgrp->lock); \ + bgrp->__VAR = (unsigned short)val; \ + hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \ + bfqg->entity.new_##__VAR = (unsigned short)val; \ + smp_wmb(); \ + bfqg->entity.ioprio_changed = 1; \ + } \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return 0; \ +} + +STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); +STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); +STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); +#undef STORE_FUNCTION + +static struct cftype bfqio_files[] = { + { + .name = "weight", + .read_u64 = bfqio_cgroup_weight_read, + .write_u64 = bfqio_cgroup_weight_write, + }, + { + .name = "ioprio", + .read_u64 = bfqio_cgroup_ioprio_read, + .write_u64 = bfqio_cgroup_ioprio_write, + }, + { + .name = "ioprio_class", + .read_u64 = bfqio_cgroup_ioprio_class_read, + .write_u64 = bfqio_cgroup_ioprio_class_write, + }, +}; + +static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, subsys, bfqio_files, + ARRAY_SIZE(bfqio_files)); +} + +static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + + if (cgroup->parent != NULL) { + bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); + if (bgrp == NULL) + return ERR_PTR(-ENOMEM); + } else + bgrp = &bfqio_root_cgroup; + + spin_lock_init(&bgrp->lock); + INIT_HLIST_HEAD(&bgrp->group_data); + bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; + bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; + + return &bgrp->css; +} + +/* + * We cannot support shared io contexts, as we have no mean to support + * two tasks with the same ioc in two different groups without major rework + * of the main cic/bfqq data structures. By now we allow a task to change + * its cgroup only if it's the only owner of its ioc; the drawback of this + * behavior is that a group containing a task that forked using CLONE_IO + * will not be destroyed until the tasks sharing the ioc die. + */ +static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct task_struct *tsk) +{ + struct io_context *ioc; + int ret = 0; + + /* task_lock() is needed to avoid races with exit_io_context() */ + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) + /* + * ioc == NULL means that the task is either too young or + * exiting: if it has still no ioc the ioc can't be shared, + * if the task is exiting the attach will fail anyway, no + * matter what we return here. + */ + ret = -EINVAL; + task_unlock(tsk); + + return ret; +} + +static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct cgroup *prev, struct task_struct *tsk) +{ + struct io_context *ioc; + struct cfq_io_context *cic; + struct hlist_node *n; + + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL) { + BUG_ON(atomic_long_read(&ioc->refcount) == 0); + atomic_long_inc(&ioc->refcount); + } + task_unlock(tsk); + + if (ioc == NULL) + return; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + bfq_cic_change_cgroup(cic, cgroup); + rcu_read_unlock(); + + put_io_context(ioc); +} + +static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct hlist_node *n, *tmp; + struct bfq_group *bfqg; + + /* + * Since we are destroying the cgroup, there are no more tasks + * referencing it, and all the RCU grace periods that may have + * referenced it are ended (as the destruction of the parent + * cgroup is RCU-safe); bgrp->group_data will not be accessed by + * anything else and we don't need any synchronization. + */ + hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node) + bfq_destroy_group(bgrp, bfqg); + + BUG_ON(!hlist_empty(&bgrp->group_data)); + + kfree(bgrp); +} + +struct cgroup_subsys bfqio_subsys = { + .name = "bfqio", + .create = bfqio_create, + .can_attach = bfqio_can_attach, + .attach = bfqio_attach, + .destroy = bfqio_destroy, + .populate = bfqio_populate, + .subsys_id = bfqio_subsys_id, +}; +#else +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->sched_data = &bfqg->sched_data; +} + +static inline struct bfq_group * +bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + return bfqd->root_group; +} + +static inline void bfq_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct bfq_entity *entity, + struct bfq_group *bfqg) +{ +} + +static inline void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + bfq_put_async_queues(bfqd, bfqd->root_group); +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + kfree(bfqd->root_group); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + int i; + + bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); + if (bfqg == NULL) + return NULL; + + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + return bfqg; +} +#endif diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c new file mode 100644 index 00000000000..8f2b6c61d3f --- /dev/null +++ b/block/bfq-ioc.c @@ -0,0 +1,380 @@ +/* + * BFQ: I/O context handling. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + */ + +/** + * bfq_cic_free_rcu - deferred cic freeing. + * @head: RCU head of the cic to free. + * + * Free the cic containing @head and, if it was the last one and + * the module is exiting wake up anyone waiting for its deallocation + * (see bfq_exit()). + */ +static void bfq_cic_free_rcu(struct rcu_head *head) +{ + struct cfq_io_context *cic; + + cic = container_of(head, struct cfq_io_context, rcu_head); + + kmem_cache_free(bfq_ioc_pool, cic); + elv_ioc_count_dec(bfq_ioc_count); + + if (bfq_ioc_gone != NULL) { + spin_lock(&bfq_ioc_gone_lock); + if (bfq_ioc_gone != NULL && + !elv_ioc_count_read(bfq_ioc_count)) { + complete(bfq_ioc_gone); + bfq_ioc_gone = NULL; + } + spin_unlock(&bfq_ioc_gone_lock); + } +} + +static void bfq_cic_free(struct cfq_io_context *cic) +{ + call_rcu(&cic->rcu_head, bfq_cic_free_rcu); +} + +/** + * cic_free_func - disconnect a cic ready to be freed. + * @ioc: the io_context @cic belongs to. + * @cic: the cic to be freed. + * + * Remove @cic from the @ioc radix tree hash and from its cic list, + * deferring the deallocation of @cic to the end of the current RCU + * grace period. This assumes that __bfq_exit_single_io_context() + * has already been called for @cic. + */ +static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) +{ + unsigned long flags; + unsigned long dead_key = (unsigned long) cic->key; + + BUG_ON(!(dead_key & CIC_DEAD_KEY)); + + spin_lock_irqsave(&ioc->lock, flags); + radix_tree_delete(&ioc->bfq_radix_root, + dead_key >> CIC_DEAD_INDEX_SHIFT); + hlist_del_init_rcu(&cic->cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + bfq_cic_free(cic); +} + +static void bfq_free_io_context(struct io_context *ioc) +{ + /* + * ioc->refcount is zero here, or we are called from elv_unregister(), + * so no more cic's are allowed to be linked into this ioc. So it + * should be ok to iterate over the known list, we will see all cic's + * since no new ones are added. + */ + call_for_each_cic(ioc, cic_free_func); +} + +/** + * __bfq_exit_single_io_context - deassociate @cic from any running task. + * @bfqd: bfq_data on which @cic is valid. + * @cic: the cic being exited. + * + * Whenever no more tasks are using @cic or @bfqd is deallocated we + * need to invalidate its entry in the radix tree hash table and to + * release the queues it refers to. + * + * Called under the queue lock. + */ +static void __bfq_exit_single_io_context(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + struct io_context *ioc = cic->ioc; + + list_del_init(&cic->queue_list); + + /* + * Make sure dead mark is seen for dead queues + */ + smp_wmb(); + rcu_assign_pointer(cic->key, bfqd_dead_key(bfqd)); + + /* + * No write-side locking as no task is using @ioc (they're exited + * or bfqd is being deallocated. + */ + rcu_read_lock(); + if (rcu_dereference(ioc->ioc_data) == cic) { + rcu_read_unlock(); + spin_lock(&ioc->lock); + rcu_assign_pointer(ioc->ioc_data, NULL); + spin_unlock(&ioc->lock); + } else + rcu_read_unlock(); + + if (cic->cfqq[BLK_RW_ASYNC] != NULL) { + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_ASYNC]); + cic->cfqq[BLK_RW_ASYNC] = NULL; + } + + if (cic->cfqq[BLK_RW_SYNC] != NULL) { + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_SYNC]); + cic->cfqq[BLK_RW_SYNC] = NULL; + } +} + +/** + * bfq_exit_single_io_context - deassociate @cic from @ioc (unlocked version). + * @ioc: the io_context @cic belongs to. + * @cic: the cic being exited. + * + * Take the queue lock and call __bfq_exit_single_io_context() to do the + * rest of the work. We take care of possible races with bfq_exit_queue() + * using bfq_get_bfqd_locked() (and abusing a little bit the RCU mechanism). + */ +static void bfq_exit_single_io_context(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL) { + __bfq_exit_single_io_context(bfqd, cic); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_exit_io_context - deassociate @ioc from all cics it owns. + * @ioc: the @ioc being exited. + * + * No more processes are using @ioc we need to clean up and put the + * internal structures we have that belongs to that process. Loop + * through all its cics, locking their queues and exiting them. + */ +static void bfq_exit_io_context(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_exit_single_io_context); +} + +static struct cfq_io_context *bfq_alloc_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct cfq_io_context *cic; + + cic = kmem_cache_alloc_node(bfq_ioc_pool, gfp_mask | __GFP_ZERO, + bfqd->queue->node); + if (cic != NULL) { + cic->ttime.last_end_request = jiffies; + INIT_LIST_HEAD(&cic->queue_list); + INIT_HLIST_NODE(&cic->cic_list); + cic->dtor = bfq_free_io_context; + cic->exit = bfq_exit_io_context; + elv_ioc_count_inc(bfq_ioc_count); + } + + return cic; +} + +/** + * bfq_drop_dead_cic - free an exited cic. + * @bfqd: bfq data for the device in use. + * @ioc: io_context owning @cic. + * @cic: the @cic to free. + * + * We drop cfq io contexts lazily, so we may find a dead one. + */ +static void bfq_drop_dead_cic(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic) +{ + unsigned long flags; + + WARN_ON(!list_empty(&cic->queue_list)); + BUG_ON(cic->key != bfqd_dead_key(bfqd)); + + spin_lock_irqsave(&ioc->lock, flags); + + BUG_ON(ioc->ioc_data == cic); + + /* + * With shared I/O contexts two lookups may race and drop the + * same cic more than one time: RCU guarantees that the storage + * will not be freed too early, here we make sure that we do + * not try to remove the cic from the hashing structures multiple + * times. + */ + if (!hlist_unhashed(&cic->cic_list)) { + radix_tree_delete(&ioc->bfq_radix_root, bfqd->cic_index); + hlist_del_init_rcu(&cic->cic_list); + bfq_cic_free(cic); + } + + spin_unlock_irqrestore(&ioc->lock, flags); +} + +/** + * bfq_cic_lookup - search into @ioc a cic associated to @bfqd. + * @bfqd: the lookup key. + * @ioc: the io_context of the process doing I/O. + * + * If @ioc already has a cic associated to @bfqd return it, return %NULL + * otherwise. + */ +static struct cfq_io_context *bfq_cic_lookup(struct bfq_data *bfqd, + struct io_context *ioc) +{ + struct cfq_io_context *cic; + unsigned long flags; + void *k; + + if (unlikely(ioc == NULL)) + return NULL; + + rcu_read_lock(); + + /* We maintain a last-hit cache, to avoid browsing over the tree. */ + cic = rcu_dereference(ioc->ioc_data); + if (cic != NULL) { + k = rcu_dereference(cic->key); + if (k == bfqd) + goto out; + } + + do { + cic = radix_tree_lookup(&ioc->bfq_radix_root, + bfqd->cic_index); + if (cic == NULL) + goto out; + + k = rcu_dereference(cic->key); + if (unlikely(k != bfqd)) { + rcu_read_unlock(); + bfq_drop_dead_cic(bfqd, ioc, cic); + rcu_read_lock(); + continue; + } + + spin_lock_irqsave(&ioc->lock, flags); + rcu_assign_pointer(ioc->ioc_data, cic); + spin_unlock_irqrestore(&ioc->lock, flags); + break; + } while (1); + +out: + rcu_read_unlock(); + + return cic; +} + +/** + * bfq_cic_link - add @cic to @ioc. + * @bfqd: bfq_data @cic refers to. + * @ioc: io_context @cic belongs to. + * @cic: the cic to link. + * @gfp_mask: the mask to use for radix tree preallocations. + * + * Add @cic to @ioc, using @bfqd as the search key. This enables us to + * lookup the process specific cfq io context when entered from the block + * layer. Also adds @cic to a per-bfqd list, used when this queue is + * removed. + */ +static int bfq_cic_link(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic, gfp_t gfp_mask) +{ + unsigned long flags; + int ret; + + ret = radix_tree_preload(gfp_mask); + if (ret == 0) { + cic->ioc = ioc; + + /* No write-side locking, cic is not published yet. */ + rcu_assign_pointer(cic->key, bfqd); + + spin_lock_irqsave(&ioc->lock, flags); + ret = radix_tree_insert(&ioc->bfq_radix_root, + bfqd->cic_index, cic); + if (ret == 0) + hlist_add_head_rcu(&cic->cic_list, &ioc->bfq_cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + radix_tree_preload_end(); + + if (ret == 0) { + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + list_add(&cic->queue_list, &bfqd->cic_list); + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); + } + } + + if (ret != 0) + printk(KERN_ERR "bfq: cic link failed!\n"); + + return ret; +} + +/** + * bfq_ioc_set_ioprio - signal a priority change to the cics belonging to @ioc. + * @ioc: the io_context changing its priority. + */ +static inline void bfq_ioc_set_ioprio(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_changed_ioprio); +} + +/** + * bfq_get_io_context - return the @cic associated to @bfqd in @ioc. + * @bfqd: the search key. + * @gfp_mask: the mask to use for cic allocation. + * + * Setup general io context and cfq io context. There can be several cfq + * io contexts per general io context, if this process is doing io to more + * than one device managed by cfq. + */ +static struct cfq_io_context *bfq_get_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct io_context *ioc = NULL; + struct cfq_io_context *cic; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + ioc = get_io_context(gfp_mask, bfqd->queue->node); + if (ioc == NULL) + return NULL; + + /* Lookup for an existing cic. */ + cic = bfq_cic_lookup(bfqd, ioc); + if (cic != NULL) + goto out; + + /* Alloc one if needed. */ + cic = bfq_alloc_io_context(bfqd, gfp_mask); + if (cic == NULL) + goto err; + + /* Link it into the ioc's radix tree and cic list. */ + if (bfq_cic_link(bfqd, ioc, cic, gfp_mask) != 0) + goto err_free; + +out: + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_BFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) + bfq_ioc_set_ioprio(ioc); + + return cic; +err_free: + bfq_cic_free(cic); +err: + put_io_context(ioc); + return NULL; +} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c new file mode 100644 index 00000000000..576cd03a28c --- /dev/null +++ b/block/bfq-iosched.c @@ -0,0 +1,3021 @@ +/* + * BFQ, or Budget Fair Queueing, disk scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. + * + * BFQ is a proportional share disk scheduling algorithm based on the + * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, + * measured in number of sectors, to tasks instead of time slices. + * The disk is not granted to the active task for a given time slice, + * but until it has exahusted its assigned budget. This change from + * the time to the service domain allows BFQ to distribute the disk + * bandwidth among tasks as desired, without any distortion due to + * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc + * internal scheduler, called B-WF2Q+, to schedule tasks according to + * their budgets. Thanks to this accurate scheduler, BFQ can afford + * to assign high budgets to disk-bound non-seeky tasks (to boost the + * throughput), and yet guarantee low latencies to interactive and + * soft real-time applications. + * + * BFQ has been introduced in [1], where the interested reader can + * find an accurate description of the algorithm, the bandwidth + * distribution and latency guarantees it provides, plus formal proofs + * of all the properties. With respect to the algorithm presented in + * the paper, this implementation adds several little heuristics, and + * a hierarchical extension, based on H-WF2Q+. + * + * B-WF2Q+ is based on WF2Q+, that is described in [2], together with + * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) + * complexity derives from the one introduced with EEVDF in [3]. + * + * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling + * with Deterministic Guarantees on Bandwidth Distribution,'', + * IEEE Transactions on Computer, May 2010. + * + * http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf + * + * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing + * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, + * Oct 1997. + * + * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz + * + * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline + * First: A Flexible and Accurate Mechanism for Proportional Share + * Resource Allocation,'' technical report. + * + * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bfq.h" + +/* Max number of dispatches in one round of service. */ +static const int bfq_quantum = 4; + +/* Expiration time of sync (0) and async (1) requests, in jiffies. */ +static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; + +/* Maximum backwards seek, in KiB. */ +static const int bfq_back_max = 16 * 1024; + +/* Penalty of a backwards seek, in number of sectors. */ +static const int bfq_back_penalty = 2; + +/* Idling period duration, in jiffies. */ +static int bfq_slice_idle = HZ / 125; + +/* Default maximum budget values, in sectors and number of requests. */ +static const int bfq_default_max_budget = 16 * 1024; +static const int bfq_max_budget_async_rq = 4; + +/* + * Async to sync throughput distribution is controlled as follows: + * when an async request is served, the entity is charged the number + * of sectors of the request, multipled by the factor below + */ +static const int bfq_async_charge_factor = 10; + +/* Default timeout values, in jiffies, approximating CFQ defaults. */ +static const int bfq_timeout_sync = HZ / 8; +static int bfq_timeout_async = HZ / 25; + +struct kmem_cache *bfq_pool; +struct kmem_cache *bfq_ioc_pool; + +static DEFINE_PER_CPU(unsigned long, bfq_ioc_count); +static struct completion *bfq_ioc_gone; +static DEFINE_SPINLOCK(bfq_ioc_gone_lock); + +static DEFINE_SPINLOCK(cic_index_lock); +static DEFINE_IDA(cic_index_ida); + +/* Below this threshold (in ms), we consider thinktime immediate. */ +#define BFQ_MIN_TT 2 + +/* hw_tag detection: parallel requests threshold and min samples needed. */ +#define BFQ_HW_QUEUE_THRESHOLD 4 +#define BFQ_HW_QUEUE_SAMPLES 32 + +#define BFQQ_SEEK_THR (sector_t)(8 * 1024) +#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) + +/* Min samples used for peak rate estimation (for autotuning). */ +#define BFQ_PEAK_RATE_SAMPLES 32 + +/* Shift used for peak rate fixed precision calculations. */ +#define BFQ_RATE_SHIFT 16 + +/* + * The duration of the weight raising for interactive applications is + * computed automatically (as default behaviour), using the following + * formula: duration = (R / r) * T, where r is the peak rate of the + * disk, and R and T are two reference parameters. In particular, R is + * the peak rate of a reference disk, and T is about the maximum time + * for starting popular large applications on that disk, under BFQ and + * while reading two files in parallel. Finally, BFQ uses two + * different pairs (R, T) depending on whether the disk is rotational + * or non-rotational. + */ +#define T_rot (msecs_to_jiffies(5500)) +#define T_nonrot (msecs_to_jiffies(2000)) +/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */ +#define R_rot 17415 +#define R_nonrot 34791 + +#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +#define RQ_CIC(rq) \ + ((struct cfq_io_context *) (rq)->elevator_private[0]) +#define RQ_BFQQ(rq) ((rq)->elevator_private[1]) + +#include "bfq-ioc.c" +#include "bfq-sched.c" +#include "bfq-cgroup.c" + +#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_IDLE) +#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_RT) + +#define bfq_sample_valid(samples) ((samples) > 80) + +/* + * We regard a request as SYNC, if either it's a read or has the SYNC bit + * set (in which case it could also be a direct WRITE). + */ +static inline int bfq_bio_sync(struct bio *bio) +{ + if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) + return 1; + + return 0; +} + +/* + * Scheduler run of queue, if there are requests pending and no one in the + * driver that will restart queueing. + */ +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) +{ + if (bfqd->queued != 0) { + bfq_log(bfqd, "schedule dispatch"); + kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); + } +} + +/* + * Lifted from AS - choose which of rq1 and rq2 that is best served now. + * We choose the request that is closesr to the head right now. Distance + * behind the head is penalized and only allowed to a certain extent. + */ +static struct request *bfq_choose_req(struct bfq_data *bfqd, + struct request *rq1, + struct request *rq2, + sector_t last) +{ + sector_t s1, s2, d1 = 0, d2 = 0; + unsigned long back_max; +#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ +#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ + unsigned wrap = 0; /* bit mask: requests behind the disk head? */ + + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; + + if (rq_is_sync(rq1) && !rq_is_sync(rq2)) + return rq1; + else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) + return rq2; + if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) + return rq1; + else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) + return rq2; + + s1 = blk_rq_pos(rq1); + s2 = blk_rq_pos(rq2); + + /* + * By definition, 1KiB is 2 sectors. + */ + back_max = bfqd->bfq_back_max * 2; + + /* + * Strict one way elevator _except_ in the case where we allow + * short backward seeks which are biased as twice the cost of a + * similar forward seek. + */ + if (s1 >= last) + d1 = s1 - last; + else if (s1 + back_max >= last) + d1 = (last - s1) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ1_WRAP; + + if (s2 >= last) + d2 = s2 - last; + else if (s2 + back_max >= last) + d2 = (last - s2) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ2_WRAP; + + /* Found required data */ + + /* + * By doing switch() on the bit mask "wrap" we avoid having to + * check two variables for all permutations: --> faster! + */ + switch (wrap) { + case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ + if (d1 < d2) + return rq1; + else if (d2 < d1) + return rq2; + else { + if (s1 >= s2) + return rq1; + else + return rq2; + } + + case BFQ_RQ2_WRAP: + return rq1; + case BFQ_RQ1_WRAP: + return rq2; + case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ + default: + /* + * Since both rqs are wrapped, + * start with the one that's further behind head + * (--> only *one* back seek required), + * since back seek takes more time than forward. + */ + if (s1 <= s2) + return rq1; + else + return rq2; + } +} + +static struct bfq_queue * +bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + sector_t sector, struct rb_node **ret_parent, + struct rb_node ***rb_link) +{ + struct rb_node **p, *parent; + struct bfq_queue *bfqq = NULL; + + parent = NULL; + p = &root->rb_node; + while (*p) { + struct rb_node **n; + + parent = *p; + bfqq = rb_entry(parent, struct bfq_queue, pos_node); + + /* + * Sort strictly based on sector. Smallest to the left, + * largest to the right. + */ + if (sector > blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_right; + else if (sector < blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_left; + else + break; + p = n; + bfqq = NULL; + } + + *ret_parent = parent; + if (rb_link) + *rb_link = p; + + bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", + (long long unsigned)sector, + bfqq != NULL ? bfqq->pid : 0); + + return bfqq; +} + +static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct rb_node **p, *parent; + struct bfq_queue *__bfqq; + + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + + if (bfq_class_idle(bfqq)) + return; + if (!bfqq->next_rq) + return; + + bfqq->pos_root = &bfqd->rq_pos_tree; + __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, + blk_rq_pos(bfqq->next_rq), &parent, &p); + if (__bfqq == NULL) { + rb_link_node(&bfqq->pos_node, parent, p); + rb_insert_color(&bfqq->pos_node, bfqq->pos_root); + } else + bfqq->pos_root = NULL; +} + +static struct request *bfq_find_next_rq(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *last) +{ + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct request *next = NULL, *prev = NULL; + + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); + + if (rbprev != NULL) + prev = rb_entry_rq(rbprev); + + if (rbnext != NULL) + next = rb_entry_rq(rbnext); + else { + rbnext = rb_first(&bfqq->sort_list); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } + + return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); +} + +static void bfq_del_rq_rb(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + const int sync = rq_is_sync(rq); + + BUG_ON(bfqq->queued[sync] == 0); + bfqq->queued[sync]--; + bfqd->queued--; + + elv_rb_del(&bfqq->sort_list, rq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue) + bfq_del_bfqq_busy(bfqd, bfqq, 1); + /* + * Remove queue from request-position tree as it is empty. + */ + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + } +} + +/* see the definition of bfq_async_charge_factor for details */ +static inline unsigned long bfq_serv_to_charge(struct request *rq, + struct bfq_queue *bfqq) +{ + return blk_rq_sectors(rq) * + (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) * + bfq_async_charge_factor)); +} + +/** + * bfq_updated_next_req - update the queue after a new next_rq selection. + * @bfqd: the device data the queue belongs to. + * @bfqq: the queue to update. + * + * If the first request of a queue changes we make sure that the queue + * has enough budget to serve at least its first request (if the + * request has grown). We do this because if the queue has not enough + * budget for its first request, it has to go through two dispatch + * rounds to actually get it dispatched. + */ +static void bfq_updated_next_req(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + struct request *next_rq = bfqq->next_rq; + unsigned long new_budget; + + if (next_rq == NULL) + return; + + if (bfqq == bfqd->active_queue) + /* + * In order not to break guarantees, budgets cannot be + * changed after an entity has been selected. + */ + return; + + BUG_ON(entity->tree != &st->active); + BUG_ON(entity == entity->sched_data->active_entity); + + new_budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + entity->budget = new_budget; + bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget); + bfq_activate_bfqq(bfqd, bfqq); +} + +static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd) +{ + u64 dur; + + if (bfqd->bfq_raising_max_time > 0) + return bfqd->bfq_raising_max_time; + + dur = bfqd->RT_prod; + do_div(dur, bfqd->peak_rate); + + return dur; +} + +static void bfq_add_rq_rb(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_entity *entity = &bfqq->entity; + struct bfq_data *bfqd = bfqq->bfqd; + struct request *next_rq, *prev; + unsigned long old_raising_coeff = bfqq->raising_coeff; + int idle_for_long_time = bfqq->budget_timeout + + bfqd->bfq_raising_min_idle_time < jiffies; + + bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq)); + bfqq->queued[rq_is_sync(rq)]++; + bfqd->queued++; + + elv_rb_add(&bfqq->sort_list, rq); + + /* + * Check if this request is a better next-serve candidate. + */ + prev = bfqq->next_rq; + next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); + BUG_ON(next_rq == NULL); + bfqq->next_rq = next_rq; + + /* + * Adjust priority tree position, if next_rq changes. + */ + if (prev != bfqq->next_rq) + bfq_rq_pos_tree_add(bfqd, bfqq); + + if (!bfq_bfqq_busy(bfqq)) { + int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && + bfqq->soft_rt_next_start < jiffies; + entity->budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + + if (! bfqd->low_latency) + goto add_bfqq_busy; + + /* + * If the queue is not being boosted and has been idle + * for enough time, start a weight-raising period + */ + if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) { + bfqq->raising_coeff = bfqd->bfq_raising_coeff; + if (idle_for_long_time) + bfqq->raising_cur_max_time = + bfq_wrais_duration(bfqd); + else + bfqq->raising_cur_max_time = + bfqd->bfq_raising_rt_max_time; + bfq_log_bfqq(bfqd, bfqq, + "wrais starting at %llu msec," + "rais_max_time %u", + bfqq->last_rais_start_finish, + jiffies_to_msecs(bfqq-> + raising_cur_max_time)); + } else if (old_raising_coeff > 1) { + if (idle_for_long_time) + bfqq->raising_cur_max_time = + bfq_wrais_duration(bfqd); + else if (bfqq->raising_cur_max_time == + bfqd->bfq_raising_rt_max_time && + !soft_rt) { + bfqq->raising_coeff = 1; + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %llu msec," + "rais_max_time %u", + bfqq->last_rais_start_finish, + jiffies_to_msecs(bfqq-> + raising_cur_max_time)); + } + } + if (old_raising_coeff != bfqq->raising_coeff) + entity->ioprio_changed = 1; +add_bfqq_busy: + bfq_add_bfqq_busy(bfqd, bfqq); + } else { + if(bfqd->low_latency && old_raising_coeff == 1 && + !rq_is_sync(rq) && + bfqq->last_rais_start_finish + + bfqd->bfq_raising_min_inter_arr_async < jiffies) { + bfqq->raising_coeff = bfqd->bfq_raising_coeff; + bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd); + + entity->ioprio_changed = 1; + bfq_log_bfqq(bfqd, bfqq, + "non-idle wrais starting at %llu msec," + "rais_max_time %u", + bfqq->last_rais_start_finish, + jiffies_to_msecs(bfqq-> + raising_cur_max_time)); + } + bfq_updated_next_req(bfqd, bfqq); + } + + if(bfqd->low_latency && + (old_raising_coeff == 1 || bfqq->raising_coeff == 1 || + idle_for_long_time)) + bfqq->last_rais_start_finish = jiffies; +} + +static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq) +{ + elv_rb_del(&bfqq->sort_list, rq); + bfqq->queued[rq_is_sync(rq)]--; + bfqq->bfqd->queued--; + bfq_add_rq_rb(rq); +} + +static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, + struct bio *bio) +{ + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return NULL; + + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + if (bfqq != NULL) { + sector_t sector = bio->bi_sector + bio_sectors(bio); + + return elv_rb_find(&bfqq->sort_list, sector); + } + + return NULL; +} + +static void bfq_activate_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + bfqd->rq_in_driver++; + bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); + bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", + (long long unsigned)bfqd->last_position); +} + +static void bfq_deactivate_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + WARN_ON(bfqd->rq_in_driver == 0); + bfqd->rq_in_driver--; +} + +static void bfq_remove_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + + if (bfqq->next_rq == rq) { + bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); + bfq_updated_next_req(bfqd, bfqq); + } + + list_del_init(&rq->queuelist); + bfq_del_rq_rb(rq); + + if (rq->cmd_flags & REQ_META) { + WARN_ON(bfqq->meta_pending == 0); + bfqq->meta_pending--; + } +} + +static int bfq_merge(struct request_queue *q, struct request **req, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct request *__rq; + + __rq = bfq_find_rq_fmerge(bfqd, bio); + if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + + return ELEVATOR_NO_MERGE; +} + +static void bfq_merged_request(struct request_queue *q, struct request *req, + int type) +{ + if (type == ELEVATOR_FRONT_MERGE) { + struct bfq_queue *bfqq = RQ_BFQQ(req); + + bfq_reposition_rq_rb(bfqq, req); + } +} + +static void bfq_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + /* + * Reposition in fifo if next is older than rq. + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && + time_before(rq_fifo_time(next), rq_fifo_time(rq))) { + list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } + + if (bfqq->next_rq == next) + bfqq->next_rq = rq; + + bfq_remove_request(next); +} + +static int bfq_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + /* Disallow merge of a sync bio into an async request. */ + if (bfq_bio_sync(bio) && !rq_is_sync(rq)) + return 0; + + /* + * Lookup the bfqq that this bio will be queued with. Allow + * merge only if rq is queued there. + */ + cic = bfq_cic_lookup(bfqd, current->io_context); + if (cic == NULL) + return 0; + + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + return bfqq == RQ_BFQQ(rq); +} + +static void __bfq_set_active_queue(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + if (bfqq != NULL) { + bfq_mark_bfqq_must_alloc(bfqq); + bfq_mark_bfqq_budget_new(bfqq); + bfq_clear_bfqq_fifo_expire(bfqq); + + bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; + + bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu", + bfqq->entity.budget); + } + + bfqd->active_queue = bfqq; +} + +/* + * Get and set a new active queue for service. + */ +static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + if (!bfqq) + bfqq = bfq_get_next_queue(bfqd); + else + bfq_get_next_queue_forced(bfqd, bfqq); + + __bfq_set_active_queue(bfqd, bfqq); + return bfqq; +} + +static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, + struct request *rq) +{ + if (blk_rq_pos(rq) >= bfqd->last_position) + return blk_rq_pos(rq) - bfqd->last_position; + else + return bfqd->last_position - blk_rq_pos(rq); +} + +/* + * Return true if bfqq has no request pending and rq is close enough to + * bfqd->last_position, or if rq is closer to bfqd->last_position than + * bfqq->next_rq + */ +static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) +{ + return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; +} + +static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) +{ + struct rb_root *root = &bfqd->rq_pos_tree; + struct rb_node *parent, *node; + struct bfq_queue *__bfqq; + sector_t sector = bfqd->last_position; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + /* + * First, if we find a request starting at the end of the last + * request, choose it. + */ + __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); + if (__bfqq != NULL) + return __bfqq; + + /* + * If the exact sector wasn't found, the parent of the NULL leaf + * will contain the closest sector (rq_pos_tree sorted by next_request + * position). + */ + __bfqq = rb_entry(parent, struct bfq_queue, pos_node); + if (bfq_rq_close(bfqd, __bfqq->next_rq)) + return __bfqq; + + if (blk_rq_pos(__bfqq->next_rq) < sector) + node = rb_next(&__bfqq->pos_node); + else + node = rb_prev(&__bfqq->pos_node); + if (node == NULL) + return NULL; + + __bfqq = rb_entry(node, struct bfq_queue, pos_node); + if (bfq_rq_close(bfqd, __bfqq->next_rq)) + return __bfqq; + + return NULL; +} + +/* + * bfqd - obvious + * cur_bfqq - passed in so that we don't decide that the current queue + * is closely cooperating with itself. + * + * We are assuming that cur_bfqq has dispatched at least one request, + * and that bfqd->last_position reflects a position on the disk associated + * with the I/O issued by cur_bfqq. + */ +static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, + struct bfq_queue *cur_bfqq) +{ + struct bfq_queue *bfqq; + + if (bfq_class_idle(cur_bfqq)) + return NULL; + if (!bfq_bfqq_sync(cur_bfqq)) + return NULL; + if (BFQQ_SEEKY(cur_bfqq)) + return NULL; + + /* If device has only one backlogged bfq_queue, don't search. */ + if (bfqd->busy_queues == 1) + return NULL; + + /* + * We should notice if some of the queues are cooperating, e.g. + * working closely on the same area of the disk. In that case, + * we can group them together and don't waste time idling. + */ + bfqq = bfqq_close(bfqd); + if (bfqq == NULL || bfqq == cur_bfqq) + return NULL; + + /* + * Do not merge queues from different bfq_groups. + */ + if (bfqq->entity.parent != cur_bfqq->entity.parent) + return NULL; + + /* + * It only makes sense to merge sync queues. + */ + if (!bfq_bfqq_sync(bfqq)) + return NULL; + if (BFQQ_SEEKY(bfqq)) + return NULL; + + /* + * Do not merge queues of different priority classes. + */ + if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) + return NULL; + + return bfqq; +} + +/* + * If enough samples have been computed, return the current max budget + * stored in bfqd, which is dynamically updated according to the + * estimated disk peak rate; otherwise return the default max budget + */ +static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget; + else + return bfqd->bfq_max_budget; +} + +/* + * Return min budget, which is a fraction of the current or default + * max budget (trying with 1/32) + */ +static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget; + else + return bfqd->bfq_max_budget / 32; +} + +/* + * Decides whether idling should be done for given device and + * given active queue. + */ +static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd, + struct bfq_queue *active_bfqq) +{ + if (active_bfqq == NULL) + return false; + /* + * If device is SSD it has no seek penalty, disable idling; but + * do so only if: + * - device does not support queuing, otherwise we still have + * a problem with sync vs async workloads; + * - the queue is not weight-raised, to preserve guarantees. + */ + return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag && + active_bfqq->raising_coeff == 1); +} + +static void bfq_arm_slice_timer(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->active_queue; + struct cfq_io_context *cic; + unsigned long sl; + + WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + if (bfq_queue_nonrot_noidle(bfqd, bfqq)) + return; + + /* Idling is disabled, either manually or by past process history. */ + if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_idle_window(bfqq)) + return; + + /* Tasks have exited, don't wait. */ + cic = bfqd->active_cic; + if (cic == NULL || atomic_read(&cic->ioc->nr_tasks) == 0) + return; + + bfq_mark_bfqq_wait_request(bfqq); + + /* + * We don't want to idle for seeks, but we do want to allow + * fair distribution of slice time for a process doing back-to-back + * seeks. So allow a little bit of time for him to submit a new rq. + * + * To prevent processes with (partly) seeky workloads from + * being too ill-treated, grant them a small fraction of the + * assigned budget before reducing the waiting time to + * BFQ_MIN_TT. This happened to help reduce latency. + */ + sl = bfqd->bfq_slice_idle; + if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) && + bfqq->entity.service > bfq_max_budget(bfqd) / 8 && + bfqq->raising_coeff == 1) + sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); + else if (bfqq->raising_coeff > 1) + sl = sl * 3; + bfqd->last_idling_start = ktime_get(); + mod_timer(&bfqd->idle_slice_timer, jiffies + sl); + bfq_log(bfqd, "arm idle: %u/%u ms", + jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); +} + +/* + * Set the maximum time for the active queue to consume its + * budget. This prevents seeky processes from lowering the disk + * throughput (always guaranteed with a time slice scheme as in CFQ). + */ +static void bfq_set_budget_timeout(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->active_queue; + unsigned int timeout_coeff; + if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time) + timeout_coeff = 1; + else + timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; + + bfqd->last_budget_start = ktime_get(); + + bfq_clear_bfqq_budget_new(bfqq); + bfqq->budget_timeout = jiffies + + bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; + + bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", + jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * + timeout_coeff)); +} + +/* + * Move request from internal lists to the request queue dispatch list. + */ +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + bfq_remove_request(rq); + bfqq->dispatched++; + elv_dispatch_sort(q, rq); + + if (bfq_bfqq_sync(bfqq)) + bfqd->sync_flight++; +} + +/* + * Return expired entry, or NULL to just start from scratch in rbtree. + */ +static struct request *bfq_check_fifo(struct bfq_queue *bfqq) +{ + struct request *rq = NULL; + + if (bfq_bfqq_fifo_expire(bfqq)) + return NULL; + + bfq_mark_bfqq_fifo_expire(bfqq); + + if (list_empty(&bfqq->fifo)) + return NULL; + + rq = rq_entry_fifo(bfqq->fifo.next); + + if (time_before(jiffies, rq_fifo_time(rq))) + return NULL; + + return rq; +} + +/* + * Must be called with the queue_lock held. + */ +static int bfqq_process_refs(struct bfq_queue *bfqq) +{ + int process_refs, io_refs; + + io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; + process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; + BUG_ON(process_refs < 0); + return process_refs; +} + +static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + int process_refs, new_process_refs; + struct bfq_queue *__bfqq; + + /* + * If there are no process references on the new_bfqq, then it is + * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain + * may have dropped their last reference (not just their last process + * reference). + */ + if (!bfqq_process_refs(new_bfqq)) + return; + + /* Avoid a circular list and skip interim queue merges. */ + while ((__bfqq = new_bfqq->new_bfqq)) { + if (__bfqq == bfqq) + return; + new_bfqq = __bfqq; + } + + process_refs = bfqq_process_refs(bfqq); + new_process_refs = bfqq_process_refs(new_bfqq); + /* + * If the process for the bfqq has gone away, there is no + * sense in merging the queues. + */ + if (process_refs == 0 || new_process_refs == 0) + return; + + /* + * Merge in the direction of the lesser amount of work. + */ + if (new_process_refs >= process_refs) { + bfqq->new_bfqq = new_bfqq; + atomic_add(process_refs, &new_bfqq->ref); + } else { + new_bfqq->new_bfqq = bfqq; + atomic_add(new_process_refs, &bfqq->ref); + } + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", + new_bfqq->pid); +} + +static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + return entity->budget - entity->service; +} + +static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfqq != bfqd->active_queue); + + __bfq_bfqd_reset_active(bfqd); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + bfq_del_bfqq_busy(bfqd, bfqq, 1); + /* + * overloading budget_timeout field to store when + * the queue remains with no backlog, used by + * the weight-raising mechanism + */ + bfqq->budget_timeout = jiffies ; + } + else { + bfq_activate_bfqq(bfqd, bfqq); + /* + * Resort priority tree of potential close cooperators. + */ + bfq_rq_pos_tree_add(bfqd, bfqq); + } + + /* + * If this bfqq is shared between multiple processes, check + * to make sure that those processes are still issuing I/Os + * within the mean seek distance. If not, it may be time to + * break the queues apart again. + */ + if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) + bfq_mark_bfqq_split_coop(bfqq); +} + +/** + * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. + * @bfqd: device data. + * @bfqq: queue to update. + * @reason: reason for expiration. + * + * Handle the feedback on @bfqq budget. See the body for detailed + * comments. + */ +static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + enum bfqq_expiration reason) +{ + struct request *next_rq; + unsigned long budget, min_budget; + + budget = bfqq->max_budget; + min_budget = bfq_min_budget(bfqd); + + BUG_ON(bfqq != bfqd->active_queue); + + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", + bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", + budget, bfq_min_budget(bfqd)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", + bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue)); + + if (bfq_bfqq_sync(bfqq)) { + switch (reason) { + /* + * Caveat: in all the following cases we trade latency + * for throughput. + */ + case BFQ_BFQQ_TOO_IDLE: + /* + * This is the only case where we may reduce + * the budget: if there is no requets of the + * process still waiting for completion, then + * we assume (tentatively) that the timer has + * expired because the batch of requests of + * the process could have been served with a + * smaller budget. Hence, betting that + * process will behave in the same way when it + * becomes backlogged again, we reduce its + * next budget. As long as we guess right, + * this budget cut reduces the latency + * experienced by the process. + * + * However, if there are still outstanding + * requests, then the process may have not yet + * issued its next request just because it is + * still waiting for the completion of some of + * the still oustanding ones. So in this + * subcase we do not reduce its budget, on the + * contrary we increase it to possibly boost + * the throughput, as discussed in the + * comments to the BUDGET_TIMEOUT case. + */ + if (bfqq->dispatched > 0) /* still oustanding reqs */ + budget = min(budget * 2, bfqd->bfq_max_budget); + else { + if (budget > 5 * min_budget) + budget -= 4 * min_budget; + else + budget = min_budget; + } + break; + case BFQ_BFQQ_BUDGET_TIMEOUT: + /* + * We double the budget here because: 1) it + * gives the chance to boost the throughput if + * this is not a seeky process (which may have + * bumped into this timeout because of, e.g., + * ZBR), 2) together with charge_full_budget + * it helps give seeky processes higher + * timestamps, and hence be served less + * frequently. + */ + budget = min(budget * 2, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_BUDGET_EXHAUSTED: + /* + * The process still has backlog, and did not + * let either the budget timeout or the disk + * idling timeout expire. Hence it is not + * seeky, has a short thinktime and may be + * happy with a higher budget too. So + * definitely increase the budget of this good + * candidate to boost the disk throughput. + */ + budget = min(budget * 4, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_NO_MORE_REQUESTS: + /* + * Leave the budget unchanged. + */ + default: + return; + } + } else /* async queue */ + /* async queues get always the maximum possible budget + * (their ability to dispatch is limited by + * @bfqd->bfq_max_budget_async_rq). + */ + budget = bfqd->bfq_max_budget; + + bfqq->max_budget = budget; + + if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && + bfqq->max_budget > bfqd->bfq_max_budget) + bfqq->max_budget = bfqd->bfq_max_budget; + + /* + * Make sure that we have enough budget for the next request. + * Since the finish time of the bfqq must be kept in sync with + * the budget, be sure to call __bfq_bfqq_expire() after the + * update. + */ + next_rq = bfqq->next_rq; + if (next_rq != NULL) + bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + else + bfqq->entity.budget = bfqq->max_budget; + + bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", + next_rq != NULL ? blk_rq_sectors(next_rq) : 0, + bfqq->entity.budget); +} + +static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) +{ + unsigned long max_budget; + + /* + * The max_budget calculated when autotuning is equal to the + * amount of sectors transfered in timeout_sync at the + * estimated peak rate. + */ + max_budget = (unsigned long)(peak_rate * 1000 * + timeout >> BFQ_RATE_SHIFT); + + return max_budget; +} + +/* + * In addition to updating the peak rate, checks whether the process + * is "slow", and returns 1 if so. This slow flag is used, in addition + * to the budget timeout, to reduce the amount of service provided to + * seeky processes, and hence reduce their chances to lower the + * throughput. See the code for more details. + */ +static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int compensate, enum bfqq_expiration reason) +{ + u64 bw, usecs, expected, timeout; + ktime_t delta; + int update = 0; + + if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) + return 0; + + if (compensate) + delta = bfqd->last_idling_start; + else + delta = ktime_get(); + delta = ktime_sub(delta, bfqd->last_budget_start); + usecs = ktime_to_us(delta); + + /* Don't trust short/unrealistic values. */ + if (usecs < 100 || usecs >= LONG_MAX) + return 0; + + /* + * Calculate the bandwidth for the last slice. We use a 64 bit + * value to store the peak rate, in sectors per usec in fixed + * point math. We do so to have enough precision in the estimate + * and to avoid overflows. + */ + bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; + do_div(bw, (unsigned long)usecs); + + timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + /* + * Use only long (> 20ms) intervals to filter out spikes for + * the peak rate estimation. + */ + if (usecs > 20000) { + if (bw > bfqd->peak_rate || + (!BFQQ_SEEKY(bfqq) && + reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { + bfq_log(bfqd, "measured bw =%llu", bw); + /* + * To smooth oscillations use a low-pass filter with + * alpha=7/8, i.e., + * new_rate = (7/8) * old_rate + (1/8) * bw + */ + do_div(bw, 8); + bfqd->peak_rate *= 7; + do_div(bfqd->peak_rate, 8); + bfqd->peak_rate += bw; + update = 1; + bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); + } + + update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; + + if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) + bfqd->peak_rate_samples++; + + if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && + update && bfqd->bfq_user_max_budget == 0) { + bfqd->bfq_max_budget = + bfq_calc_max_budget(bfqd->peak_rate, timeout); + bfq_log(bfqd, "new max_budget=%lu", + bfqd->bfq_max_budget); + } + } + + /* + * If the process has been served for a too short time + * interval to let its possible sequential accesses prevail on + * the initial seek time needed to move the disk head on the + * first sector it requested, then give the process a chance + * and for the moment return false. + */ + if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) + return 0; + + /* + * A process is considered ``slow'' (i.e., seeky, so that we + * cannot treat it fairly in the service domain, as it would + * slow down too much the other processes) if, when a slice + * ends for whatever reason, it has received service at a + * rate that would not be high enough to complete the budget + * before the budget timeout expiration. + */ + expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; + + /* + * Caveat: processes doing IO in the slower disk zones will + * tend to be slow(er) even if not seeky. And the estimated + * peak rate will actually be an average over the disk + * surface. Hence, to not be too harsh with unlucky processes, + * we keep a budget/3 margin of safety before declaring a + * process slow. + */ + return expected > (4 * bfqq->entity.budget) / 3; +} + +/** + * bfq_bfqq_expire - expire a queue. + * @bfqd: device owning the queue. + * @bfqq: the queue to expire. + * @compensate: if true, compensate for the time spent idling. + * @reason: the reason causing the expiration. + * + * + * If the process associated to the queue is slow (i.e., seeky), or in + * case of budget timeout, or, finally, if it is async, we + * artificially charge it an entire budget (independently of the + * actual service it received). As a consequence, the queue will get + * higher timestamps than the correct ones upon reactivation, and + * hence it will be rescheduled as if it had received more service + * than what it actually received. In the end, this class of processes + * will receive less service in proportion to how slowly they consume + * their budgets (and hence how seriously they tend to lower the + * throughput). + * + * In contrast, when a queue expires because it has been idling for + * too much or because it exhausted its budget, we do not touch the + * amount of service it has received. Hence when the queue will be + * reactivated and its timestamps updated, the latter will be in sync + * with the actual service received by the queue until expiration. + * + * Charging a full budget to the first type of queues and the exact + * service to the others has the effect of using the WF2Q+ policy to + * schedule the former on a timeslice basis, without violating the + * service domain guarantees of the latter. + */ +static void bfq_bfqq_expire(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + int compensate, + enum bfqq_expiration reason) +{ + int slow; + BUG_ON(bfqq != bfqd->active_queue); + + /* Update disk peak rate for autotuning and check whether the + * process is slow (see bfq_update_peak_rate). + */ + slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); + + /* + * As above explained, 'punish' slow (i.e., seeky), timed-out + * and async queues, to favor sequential sync workloads. + * + * Processes doing IO in the slower disk zones will tend to be + * slow(er) even if not seeky. Hence, since the estimated peak + * rate is actually an average over the disk surface, these + * processes may timeout just for bad luck. To avoid punishing + * them we do not charge a full budget to a process that + * succeeded in consuming at least 2/3 of its budget. + */ + if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) + bfq_bfqq_charge_full_budget(bfqq); + + if (bfqd->low_latency && bfqq->raising_coeff == 1) + bfqq->last_rais_start_finish = jiffies; + + if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) { + if(reason != BFQ_BFQQ_BUDGET_TIMEOUT) + bfqq->soft_rt_next_start = + jiffies + + HZ * bfqq->entity.service / + bfqd->bfq_raising_max_softrt_rate; + else + bfqq->soft_rt_next_start = -1; /* infinity */ + } + bfq_log_bfqq(bfqd, bfqq, + "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow, + bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); + + /* Increase, decrease or leave budget unchanged according to reason */ + __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); + __bfq_bfqq_expire(bfqd, bfqq); +} + +/* + * Budget timeout is not implemented through a dedicated timer, but + * just checked on request arrivals and completions, as well as on + * idle timer expirations. + */ +static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_budget_new(bfqq)) + return 0; + + if (time_before(jiffies, bfqq->budget_timeout)) + return 0; + + return 1; +} + +/* + * If we expire a queue that is waiting for the arrival of a new + * request, we may prevent the fictitious timestamp backshifting that + * allows the guarantees of the queue to be preserved (see [1] for + * this tricky aspect). Hence we return true only if this condition + * does not hold, or if the queue is slow enough to deserve only to be + * kicked off for preserving a high throughput. +*/ +static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, + "may_budget_timeout: wr %d left %d timeout %d", + bfq_bfqq_wait_request(bfqq), + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, + bfq_bfqq_budget_timeout(bfqq)); + + return (!bfq_bfqq_wait_request(bfqq) || + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) + && + bfq_bfqq_budget_timeout(bfqq); +} + +/* + * Select a queue for service. If we have a current active queue, + * check whether to continue servicing it, or retrieve and set a new one. + */ +static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq, *new_bfqq = NULL; + struct request *next_rq; + enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; + + bfqq = bfqd->active_queue; + if (bfqq == NULL) + goto new_queue; + + bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue"); + + /* + * If another queue has a request waiting within our mean seek + * distance, let it run. The expire code will check for close + * cooperators and put the close queue at the front of the + * service tree. If possible, merge the expiring queue with the + * new bfqq. + */ + new_bfqq = bfq_close_cooperator(bfqd, bfqq); + if (new_bfqq != NULL && bfqq->new_bfqq == NULL) + bfq_setup_merge(bfqq, new_bfqq); + + if (bfq_may_expire_for_budg_timeout(bfqq)) + goto expire; + + next_rq = bfqq->next_rq; + /* + * If bfqq has requests queued and it has enough budget left to + * serve them, keep the queue, otherwise expire it. + */ + if (next_rq != NULL) { + if (bfq_serv_to_charge(next_rq, bfqq) > + bfq_bfqq_budget_left(bfqq)) { + reason = BFQ_BFQQ_BUDGET_EXHAUSTED; + goto expire; + } else { + /* + * The idle timer may be pending because we may not + * disable disk idling even when a new request arrives + */ + if (timer_pending(&bfqd->idle_slice_timer)) { + /* + * If we get here: 1) at least a new request + * has arrived but we have not disabled the + * timer because the request was too small, + * 2) then the block layer has unplugged the + * device, causing the dispatch to be invoked. + * + * Since the device is unplugged, now the + * requests are probably large enough to + * provide a reasonable throughput. + * So we disable idling. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + } + if (new_bfqq == NULL) + goto keep_queue; + else + goto expire; + } + } + + /* + * No requests pending. If there is no cooperator, and the active + * queue still has requests in flight or is idling for a new request, + * then keep it. + */ + if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || + (bfqq->dispatched != 0 && bfq_bfqq_idle_window(bfqq) && + !bfq_queue_nonrot_noidle(bfqd, bfqq)))) { + bfqq = NULL; + goto keep_queue; + } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { + /* + * Expiring the queue because there is a close cooperator, + * cancel timer. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + } + + reason = BFQ_BFQQ_NO_MORE_REQUESTS; +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, reason); +new_queue: + bfqq = bfq_set_active_queue(bfqd, new_bfqq); + bfq_log(bfqd, "select_queue: new queue %d returned", + bfqq != NULL ? bfqq->pid : 0); +keep_queue: + return bfqq; +} + +static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + if (bfqq->raising_coeff > 1) { /* queue is being boosted */ + struct bfq_entity *entity = &bfqq->entity; + + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, " + "old raising coeff %u, w %d(%d)", + jiffies_to_msecs(jiffies - + bfqq->last_rais_start_finish), + jiffies_to_msecs(bfqq->raising_cur_max_time), + bfqq->raising_coeff, + bfqq->entity.weight, bfqq->entity.orig_weight); + + BUG_ON(bfqq != bfqd->active_queue && entity->weight != + entity->orig_weight * bfqq->raising_coeff); + if(entity->ioprio_changed) + bfq_log_bfqq(bfqd, bfqq, + "WARN: pending prio change"); + /* + * If too much time has elapsed from the beginning + * of this weight-raising period and process is not soft + * real-time, stop it + */ + if (jiffies - bfqq->last_rais_start_finish > + bfqq->raising_cur_max_time) { + int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && + bfqq->soft_rt_next_start < jiffies; + + bfqq->last_rais_start_finish = jiffies; + if (soft_rt) + bfqq->raising_cur_max_time = + bfqd->bfq_raising_rt_max_time; + else { + bfqq->raising_coeff = 1; + entity->ioprio_changed = 1; + __bfq_entity_update_weight_prio( + bfq_entity_service_tree(entity), + entity); + } + } + } +} + + +/* + * Dispatch one request from bfqq, moving it to the request queue + * dispatch list. + */ +static int bfq_dispatch_request(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + int dispatched = 0; + struct request *rq; + unsigned long service_to_charge; + + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + + /* Follow expired path, else get first next available. */ + rq = bfq_check_fifo(bfqq); + if (rq == NULL) + rq = bfqq->next_rq; + service_to_charge = bfq_serv_to_charge(rq, bfqq); + + if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { + /* + * This may happen if the next rq is chosen + * in fifo order instead of sector order. + * The budget is properly dimensioned + * to be always sufficient to serve the next request + * only if it is chosen in sector order. The reason is + * that it would be quite inefficient and little useful + * to always make sure that the budget is large enough + * to serve even the possible next rq in fifo order. + * In fact, requests are seldom served in fifo order. + * + * Expire the queue for budget exhaustion, and + * make sure that the next act_budget is enough + * to serve the next request, even if it comes + * from the fifo expired path. + */ + bfqq->next_rq = rq; + /* + * Since this dispatch is failed, make sure that + * a new one will be performed + */ + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); + goto expire; + } + + /* Finally, insert request into driver dispatch list. */ + bfq_bfqq_served(bfqq, service_to_charge); + bfq_dispatch_insert(bfqd->queue, rq); + + update_raising_data(bfqd, bfqq); + + bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), " + "budg left %lu", + blk_rq_sectors(rq), + (long long unsigned)blk_rq_pos(rq), + bfq_bfqq_budget_left(bfqq)); + + dispatched++; + + if (bfqd->active_cic == NULL) { + atomic_long_inc(&RQ_CIC(rq)->ioc->refcount); + bfqd->active_cic = RQ_CIC(rq); + } + + if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && + dispatched >= bfqd->bfq_max_budget_async_rq) || + bfq_class_idle(bfqq))) + goto expire; + + return dispatched; + +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); + return dispatched; +} + +static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) +{ + int dispatched = 0; + + while (bfqq->next_rq != NULL) { + bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); + dispatched++; + } + + BUG_ON(!list_empty(&bfqq->fifo)); + return dispatched; +} + +/* + * Drain our current requests. Used for barriers and when switching + * io schedulers on-the-fly. + */ +static int bfq_forced_dispatch(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq, *n; + struct bfq_service_tree *st; + int dispatched = 0; + + bfqq = bfqd->active_queue; + if (bfqq != NULL) + __bfq_bfqq_expire(bfqd, bfqq); + + /* + * Loop through classes, and be careful to leave the scheduler + * in a consistent state, as feedback mechanisms and vtime + * updates cannot be disabled during the process. + */ + list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { + st = bfq_entity_service_tree(&bfqq->entity); + + dispatched += __bfq_forced_dispatch_bfqq(bfqq); + bfqq->max_budget = bfq_max_budget(bfqd); + + bfq_forget_idle(st); + } + + BUG_ON(bfqd->busy_queues != 0); + + return dispatched; +} + +static int bfq_dispatch_requests(struct request_queue *q, int force) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq; + int max_dispatch; + + bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); + if (bfqd->busy_queues == 0) + return 0; + + if (unlikely(force)) + return bfq_forced_dispatch(bfqd); + + if((bfqq = bfq_select_queue(bfqd)) == NULL) + return 0; + + max_dispatch = bfqd->bfq_quantum; + if (bfq_class_idle(bfqq)) + max_dispatch = 1; + + if (!bfq_bfqq_sync(bfqq)) + max_dispatch = bfqd->bfq_max_budget_async_rq; + + if (bfqq->dispatched >= max_dispatch) { + if (bfqd->busy_queues > 1) + return 0; + if (bfqq->dispatched >= 4 * max_dispatch) + return 0; + } + + if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) + return 0; + + bfq_clear_bfqq_wait_request(bfqq); + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + if (! bfq_dispatch_request(bfqd, bfqq)) + return 0; + + bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d" + "(max_disp %d)", bfqq->pid, max_dispatch); + + return 1; +} + +/* + * Task holds one reference to the queue, dropped when task exits. Each rq + * in-flight on this queue also holds a reference, dropped when rq is freed. + * + * Queue lock must be held here. + */ +static void bfq_put_queue(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; + + BUG_ON(atomic_read(&bfqq->ref) <= 0); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, + atomic_read(&bfqq->ref)); + if (!atomic_dec_and_test(&bfqq->ref)) + return; + + BUG_ON(rb_first(&bfqq->sort_list) != NULL); + BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); + BUG_ON(bfqq->entity.tree != NULL); + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqd->active_queue == bfqq); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); + + kmem_cache_free(bfq_pool, bfqq); +} + +static void bfq_put_cooperator(struct bfq_queue *bfqq) +{ + struct bfq_queue *__bfqq, *next; + + /* + * If this queue was scheduled to merge with another queue, be + * sure to drop the reference taken on that queue (and others in + * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. + */ + __bfqq = bfqq->new_bfqq; + while (__bfqq) { + if (__bfqq == bfqq) { + WARN(1, "bfqq->new_bfqq loop detected.\n"); + break; + } + next = __bfqq->new_bfqq; + bfq_put_queue(__bfqq); + __bfqq = next; + } +} + +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + if (bfqq == bfqd->active_queue) { + __bfq_bfqq_expire(bfqd, bfqq); + bfq_schedule_dispatch(bfqd); + } + + bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); +} + +/* + * Update the entity prio values; note that the new values will not + * be used until the next (re)activation. + */ +static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) +{ + struct task_struct *tsk = current; + int ioprio_class; + + if (!bfq_bfqq_prio_changed(bfqq)) + return; + + ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); + switch (ioprio_class) { + default: + printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class); + case IOPRIO_CLASS_NONE: + /* + * No prio set, inherit CPU scheduling settings. + */ + bfqq->entity.new_ioprio = task_nice_ioprio(tsk); + bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); + break; + case IOPRIO_CLASS_RT: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; + break; + case IOPRIO_CLASS_BE: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; + break; + case IOPRIO_CLASS_IDLE: + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; + bfqq->entity.new_ioprio = 7; + bfq_clear_bfqq_idle_window(bfqq); + break; + } + + bfqq->entity.ioprio_changed = 1; + + /* + * Keep track of original prio settings in case we have to temporarily + * elevate the priority of this queue. + */ + bfqq->org_ioprio = bfqq->entity.new_ioprio; + bfq_clear_bfqq_prio_changed(bfqq); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + struct bfq_queue *bfqq, *new_bfqq; + struct bfq_group *bfqg; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (unlikely(bfqd == NULL)) + return; + + bfqq = cic->cfqq[BLK_RW_ASYNC]; + if (bfqq != NULL) { + bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, + sched_data); + new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, cic->ioc, + GFP_ATOMIC); + if (new_bfqq != NULL) { + cic->cfqq[BLK_RW_ASYNC] = new_bfqq; + bfq_log_bfqq(bfqd, bfqq, + "changed_ioprio: bfqq %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } + } + + bfqq = cic->cfqq[BLK_RW_SYNC]; + if (bfqq != NULL) + bfq_mark_bfqq_prio_changed(bfqq); + + bfq_put_bfqd_unlock(bfqd, &flags); +} + +static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + pid_t pid, int is_sync) +{ + RB_CLEAR_NODE(&bfqq->entity.rb_node); + INIT_LIST_HEAD(&bfqq->fifo); + + atomic_set(&bfqq->ref, 0); + bfqq->bfqd = bfqd; + + bfq_mark_bfqq_prio_changed(bfqq); + + if (is_sync) { + if (!bfq_class_idle(bfqq)) + bfq_mark_bfqq_idle_window(bfqq); + bfq_mark_bfqq_sync(bfqq); + } + + /* Tentative initial value to trade off between thr and lat */ + bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; + bfqq->pid = pid; + + bfqq->raising_coeff = 1; + bfqq->last_rais_start_finish = 0; + bfqq->soft_rt_next_start = -1; +} + +static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int is_sync, + struct io_context *ioc, + gfp_t gfp_mask) +{ + struct bfq_queue *bfqq, *new_bfqq = NULL; + struct cfq_io_context *cic; + +retry: + cic = bfq_cic_lookup(bfqd, ioc); + /* cic always exists here */ + bfqq = cic_to_bfqq(cic, is_sync); + + /* + * Always try a new alloc if we fall back to the OOM bfqq + * originally, since it should just be a temporary situation. + */ + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = NULL; + if (new_bfqq != NULL) { + bfqq = new_bfqq; + new_bfqq = NULL; + } else if (gfp_mask & __GFP_WAIT) { + spin_unlock_irq(bfqd->queue->queue_lock); + new_bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + spin_lock_irq(bfqd->queue->queue_lock); + if (new_bfqq != NULL) + goto retry; + } else { + bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + } + + if (bfqq != NULL) { + bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); + bfq_log_bfqq(bfqd, bfqq, "allocated"); + } else { + bfqq = &bfqd->oom_bfqq; + bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); + } + + bfq_init_prio_data(bfqq, ioc); + bfq_init_entity(&bfqq->entity, bfqg); + } + + if (new_bfqq != NULL) + kmem_cache_free(bfq_pool, new_bfqq); + + return bfqq; +} + +static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int ioprio_class, int ioprio) +{ + switch (ioprio_class) { + case IOPRIO_CLASS_RT: + return &bfqg->async_bfqq[0][ioprio]; + case IOPRIO_CLASS_BE: + return &bfqg->async_bfqq[1][ioprio]; + case IOPRIO_CLASS_IDLE: + return &bfqg->async_idle_bfqq; + default: + BUG(); + } +} + +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask) +{ + const int ioprio = task_ioprio(ioc); + const int ioprio_class = task_ioprio_class(ioc); + struct bfq_queue **async_bfqq = NULL; + struct bfq_queue *bfqq = NULL; + + if (!is_sync) { + async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, + ioprio); + bfqq = *async_bfqq; + } + + if (bfqq == NULL) + bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask); + + /* + * Pin the queue now that it's allocated, scheduler exit will prune it. + */ + if (!is_sync && *async_bfqq == NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfqq, atomic_read(&bfqq->ref)); + *async_bfqq = bfqq; + } + + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + return bfqq; +} + +static void bfq_update_io_thinktime(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + unsigned long elapsed = jiffies - cic->ttime.last_end_request; + unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); + + cic->ttime.ttime_samples = (7*cic->ttime.ttime_samples + 256) / 8; + cic->ttime.ttime_total = (7*cic->ttime.ttime_total + 256*ttime) / 8; + cic->ttime.ttime_mean = (cic->ttime.ttime_total + 128) / cic->ttime.ttime_samples; +} + +static void bfq_update_io_seektime(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *rq) +{ + sector_t sdist; + u64 total; + + if (bfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - bfqq->last_request_pos; + else + sdist = bfqq->last_request_pos - blk_rq_pos(rq); + + /* + * Don't allow the seek distance to get too large from the + * odd fragment, pagein, etc. + */ + if (bfqq->seek_samples == 0) /* first request, not really a seek */ + sdist = 0; + else if (bfqq->seek_samples <= 60) /* second & third seek */ + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); + else + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); + + bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; + bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; + total = bfqq->seek_total + (bfqq->seek_samples/2); + do_div(total, bfqq->seek_samples); + if (bfq_bfqq_coop(bfqq)) { + /* + * If the mean seektime increases for a (non-seeky) shared + * queue, some cooperator is likely to be idling too much. + * On the contrary, if it decreases, some cooperator has + * probably waked up. + * + */ + if ((sector_t)total < bfqq->seek_mean) + bfq_mark_bfqq_some_coop_idle(bfqq) ; + else if ((sector_t)total > bfqq->seek_mean) + bfq_clear_bfqq_some_coop_idle(bfqq) ; + } + bfqq->seek_mean = (sector_t)total; + + bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, + (u64)bfqq->seek_mean); +} + +/* + * Disable idle window if the process thinks too long or seeks so much that + * it doesn't matter. + */ +static void bfq_update_idle_window(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct cfq_io_context *cic) +{ + int enable_idle; + + /* Don't idle for async or idle io prio class. */ + if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) + return; + + enable_idle = bfq_bfqq_idle_window(bfqq); + + if (atomic_read(&cic->ioc->nr_tasks) == 0 || + bfqd->bfq_slice_idle == 0 || + (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && + bfqq->raising_coeff == 1)) + enable_idle = 0; + else if (bfq_sample_valid(cic->ttime.ttime_samples)) { + if (cic->ttime.ttime_mean > bfqd->bfq_slice_idle && + bfqq->raising_coeff == 1) + enable_idle = 0; + else + enable_idle = 1; + } + bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", + enable_idle); + + if (enable_idle) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); +} + +/* + * Called when a new fs request (rq) is added to bfqq. Check if there's + * something we should do about it. + */ +static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct request *rq) +{ + struct cfq_io_context *cic = RQ_CIC(rq); + + if (rq->cmd_flags & REQ_META) + bfqq->meta_pending++; + + bfq_update_io_thinktime(bfqd, cic); + bfq_update_io_seektime(bfqd, bfqq, rq); + if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || + !BFQQ_SEEKY(bfqq)) + bfq_update_idle_window(bfqd, bfqq, cic); + + bfq_log_bfqq(bfqd, bfqq, + "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", + bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), + (long long unsigned)bfqq->seek_mean); + + bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); + + if (bfqq == bfqd->active_queue) { + /* + * If there is just this request queued and the request + * is small, just exit. + * In this way, if the disk is being idled to wait for a new + * request from the active queue, we avoid unplugging the + * device now. + * + * By doing so, we spare the disk to be committed + * to serve just a small request. On the contrary, we wait for + * the block layer to decide when to unplug the device: + * hopefully, new requests will be merged to this + * one quickly, then the device will be unplugged + * and larger requests will be dispatched. + */ + if (bfqq->queued[rq_is_sync(rq)] == 1 && + blk_rq_sectors(rq) < 32) { + return; + } + if (bfq_bfqq_wait_request(bfqq)) { + /* + * If we are waiting for a request for this queue, let + * it rip immediately and flag that we must not expire + * this queue just now. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + /* + * Here we can safely expire the queue, in + * case of budget timeout, without wasting + * guarantees + */ + if (bfq_bfqq_budget_timeout(bfqq)) + bfq_bfqq_expire(bfqd, bfqq, 0, + BFQ_BFQQ_BUDGET_TIMEOUT); + __blk_run_queue(bfqd->queue); + } + } +} + +static void bfq_insert_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + assert_spin_locked(bfqd->queue->queue_lock); + bfq_init_prio_data(bfqq, RQ_CIC(rq)->ioc); + + bfq_add_rq_rb(rq); + + rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); + list_add_tail(&rq->queuelist, &bfqq->fifo); + + bfq_rq_enqueued(bfqd, bfqq, rq); +} + +static void bfq_update_hw_tag(struct bfq_data *bfqd) +{ + bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, + bfqd->rq_in_driver); + + if (bfqd->hw_tag == 1) + return; + + /* + * This sample is valid if the number of outstanding requests + * is large enough to allow a queueing behavior. Note that the + * sum is not exact, as it's not taking into account deactivated + * requests. + */ + if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) + return; + + if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) + return; + + bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; + bfqd->max_rq_in_driver = 0; + bfqd->hw_tag_samples = 0; +} + +static void bfq_completed_request(struct request_queue *q, struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + const int sync = rq_is_sync(rq); + + bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)", + blk_rq_sectors(rq), sync); + + bfq_update_hw_tag(bfqd); + + WARN_ON(!bfqd->rq_in_driver); + WARN_ON(!bfqq->dispatched); + bfqd->rq_in_driver--; + bfqq->dispatched--; + + if (bfq_bfqq_sync(bfqq)) + bfqd->sync_flight--; + + if (sync) + RQ_CIC(rq)->ttime.last_end_request = jiffies; + + /* + * If this is the active queue, check if it needs to be expired, + * or if we want to idle in case it has no pending requests. + */ + if (bfqd->active_queue == bfqq) { + if (bfq_bfqq_budget_new(bfqq)) + bfq_set_budget_timeout(bfqd); + + /* Idling is disabled also for cooperation issues: + * 1) there is a close cooperator for the queue, or + * 2) the queue is shared and some cooperator is likely + * to be idle (in this case, by not arming the idle timer, + * we try to slow down the queue, to prevent the zones + * of the disk accessed by the active cooperators to become + * too distant from the zone that will be accessed by the + * currently idle cooperators) + */ + if (bfq_may_expire_for_budg_timeout(bfqq)) + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); + else if (sync && + (bfqd->rq_in_driver == 0 || + bfqq->raising_coeff > 1) + && RB_EMPTY_ROOT(&bfqq->sort_list) + && !bfq_close_cooperator(bfqd, bfqq) + && (!bfq_bfqq_coop(bfqq) || + !bfq_bfqq_some_coop_idle(bfqq))) + bfq_arm_slice_timer(bfqd); + } + + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); +} + +static inline int __bfq_may_queue(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { + bfq_clear_bfqq_must_alloc(bfqq); + return ELV_MQUEUE_MUST; + } + + return ELV_MQUEUE_MAY; +} + +static int bfq_may_queue(struct request_queue *q, int rw) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + /* + * Don't force setup of a queue from here, as a call to may_queue + * does not necessarily imply that a request actually will be queued. + * So just lookup a possibly existing queue, or return 'may queue' + * if that fails. + */ + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return ELV_MQUEUE_MAY; + + bfqq = cic_to_bfqq(cic, rw_is_sync(rw)); + if (bfqq != NULL) { + bfq_init_prio_data(bfqq, cic->ioc); + + return __bfq_may_queue(bfqq); + } + + return ELV_MQUEUE_MAY; +} + +/* + * Queue lock held here. + */ +static void bfq_put_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + if (bfqq != NULL) { + const int rw = rq_data_dir(rq); + + BUG_ON(!bfqq->allocated[rw]); + bfqq->allocated[rw]--; + + put_io_context(RQ_CIC(rq)->ioc); + + rq->elevator_private[0] = NULL; + rq->elevator_private[1] = NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +static struct bfq_queue * +bfq_merge_bfqqs(struct bfq_data *bfqd, struct cfq_io_context *cic, + struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", + (long unsigned)bfqq->new_bfqq->pid); + cic_set_bfqq(cic, bfqq->new_bfqq, 1); + bfq_mark_bfqq_coop(bfqq->new_bfqq); + bfq_put_queue(bfqq); + return cic_to_bfqq(cic, 1); +} + +/* + * Returns NULL if a new bfqq should be allocated, or the old bfqq if this + * was the last process referring to said bfqq. + */ +static struct bfq_queue * +bfq_split_bfqq(struct cfq_io_context *cic, struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); + if (bfqq_process_refs(bfqq) == 1) { + bfqq->pid = current->pid; + bfq_clear_bfqq_some_coop_idle(bfqq); + bfq_clear_bfqq_coop(bfqq); + bfq_clear_bfqq_split_coop(bfqq); + return bfqq; + } + + cic_set_bfqq(cic, NULL, 1); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); + return NULL; +} + +/* + * Allocate bfq data structures associated with this request. + */ +static int bfq_set_request(struct request_queue *q, struct request *rq, + gfp_t gfp_mask) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + const int rw = rq_data_dir(rq); + const int is_sync = rq_is_sync(rq); + struct bfq_queue *bfqq; + struct bfq_group *bfqg; + unsigned long flags; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + cic = bfq_get_io_context(bfqd, gfp_mask); + + spin_lock_irqsave(q->queue_lock, flags); + + if (cic == NULL) + goto queue_fail; + + bfqg = bfq_cic_update_cgroup(cic); + +new_queue: + bfqq = cic_to_bfqq(cic, is_sync); + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = bfq_get_queue(bfqd, bfqg, is_sync, cic->ioc, gfp_mask); + cic_set_bfqq(cic, bfqq, is_sync); + } else { + /* + * If the queue was seeky for too long, break it apart. + */ + if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); + bfqq = bfq_split_bfqq(cic, bfqq); + if (!bfqq) + goto new_queue; + } + + /* + * Check to see if this queue is scheduled to merge with + * another closely cooperating queue. The merging of queues + * happens here as it must be done in process context. + * The reference on new_bfqq was taken in merge_bfqqs. + */ + if (bfqq->new_bfqq != NULL) + bfqq = bfq_merge_bfqqs(bfqd, cic, bfqq); + } + + bfqq->allocated[rw]++; + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + spin_unlock_irqrestore(q->queue_lock, flags); + + rq->elevator_private[0] = cic; + rq->elevator_private[1] = bfqq; + + return 0; + +queue_fail: + if (cic != NULL) + put_io_context(cic->ioc); + + bfq_schedule_dispatch(bfqd); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 1; +} + +static void bfq_kick_queue(struct work_struct *work) +{ + struct bfq_data *bfqd = + container_of(work, struct bfq_data, unplug_work); + struct request_queue *q = bfqd->queue; + + spin_lock_irq(q->queue_lock); + __blk_run_queue(q); + spin_unlock_irq(q->queue_lock); +} + +/* + * Handler of the expiration of the timer running if the active_queue + * is idling inside its time slice. + */ +static void bfq_idle_slice_timer(unsigned long data) +{ + struct bfq_data *bfqd = (struct bfq_data *)data; + struct bfq_queue *bfqq; + unsigned long flags; + enum bfqq_expiration reason; + + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + + bfqq = bfqd->active_queue; + /* + * Theoretical race here: active_queue can be NULL or different + * from the queue that was idling if the timer handler spins on + * the queue_lock and a new request arrives for the current + * queue and there is a full dispatch cycle that changes the + * active_queue. This can hardly happen, but in the worst case + * we just expire a queue too early. + */ + if (bfqq != NULL) { + bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); + if (bfq_bfqq_budget_timeout(bfqq)) + /* + * Also here the queue can be safely expired + * for budget timeout without wasting + * guarantees + */ + reason = BFQ_BFQQ_BUDGET_TIMEOUT; + else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) + /* + * The queue may not be empty upon timer expiration, + * because we may not disable the timer when the first + * request of the active queue arrives during + * disk idling + */ + reason = BFQ_BFQQ_TOO_IDLE; + else + goto schedule_dispatch; + + bfq_bfqq_expire(bfqd, bfqq, 1, reason); + } + +schedule_dispatch: + bfq_schedule_dispatch(bfqd); + + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); +} + +static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) +{ + del_timer_sync(&bfqd->idle_slice_timer); + cancel_work_sync(&bfqd->unplug_work); +} + +static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, + struct bfq_queue **bfqq_ptr) +{ + struct bfq_group *root_group = bfqd->root_group; + struct bfq_queue *bfqq = *bfqq_ptr; + + bfq_log(bfqd, "put_async_bfqq: %p", bfqq); + if (bfqq != NULL) { + bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); + bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + *bfqq_ptr = NULL; + } +} + +/* + * Release all the bfqg references to its async queues. If we are + * deallocating the group these queues may still contain requests, so + * we reparent them to the root cgroup (i.e., the only one that will + * exist for sure untill all the requests on a device are gone). + */ +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_BE_NR; j++) + __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); + + __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); +} + +static void bfq_exit_queue(struct elevator_queue *e) +{ + struct bfq_data *bfqd = e->elevator_data; + struct request_queue *q = bfqd->queue; + struct bfq_queue *bfqq, *n; + struct cfq_io_context *cic; + + bfq_shutdown_timer_wq(bfqd); + + spin_lock_irq(q->queue_lock); + + while (!list_empty(&bfqd->cic_list)) { + cic = list_entry(bfqd->cic_list.next, struct cfq_io_context, + queue_list); + __bfq_exit_single_io_context(bfqd, cic); + } + + BUG_ON(bfqd->active_queue != NULL); + list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) + bfq_deactivate_bfqq(bfqd, bfqq, 0); + + bfq_disconnect_groups(bfqd); + spin_unlock_irq(q->queue_lock); + + bfq_shutdown_timer_wq(bfqd); + + spin_lock(&cic_index_lock); + ida_remove(&cic_index_ida, bfqd->cic_index); + spin_unlock(&cic_index_lock); + + /* Wait for cic->key accessors to exit their grace periods. */ + synchronize_rcu(); + + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + bfq_free_root_group(bfqd); + kfree(bfqd); +} + +static int bfq_alloc_cic_index(void) +{ + int index, error; + + do { + if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&cic_index_lock); + error = ida_get_new(&cic_index_ida, &index); + spin_unlock(&cic_index_lock); + if (error && error != -EAGAIN) + return error; + } while (error); + + return index; +} + +static void *bfq_init_queue(struct request_queue *q) +{ + struct bfq_group *bfqg; + struct bfq_data *bfqd; + int i; + + i = bfq_alloc_cic_index(); + if (i < 0) + return NULL; + + bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node); + if (bfqd == NULL) + return NULL; + + bfqd->cic_index = i; + + /* + * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. + */ + bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); + atomic_inc(&bfqd->oom_bfqq.ref); + + INIT_LIST_HEAD(&bfqd->cic_list); + + bfqd->queue = q; + + bfqg = bfq_alloc_root_group(bfqd, q->node); + if (bfqg == NULL) { + kfree(bfqd); + return NULL; + } + + bfqd->root_group = bfqg; + + init_timer(&bfqd->idle_slice_timer); + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + bfqd->idle_slice_timer.data = (unsigned long)bfqd; + + bfqd->rq_pos_tree = RB_ROOT; + + INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); + + bfqd->hw_tag = -1; + + bfqd->bfq_max_budget = bfq_default_max_budget; + + bfqd->bfq_quantum = bfq_quantum; + bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; + bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; + bfqd->bfq_back_max = bfq_back_max; + bfqd->bfq_back_penalty = bfq_back_penalty; + bfqd->bfq_slice_idle = bfq_slice_idle; + bfqd->bfq_class_idle_last_service = 0; + bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; + bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; + bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; + + bfqd->low_latency = true; + + bfqd->bfq_raising_coeff = 20; + bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300); + bfqd->bfq_raising_max_time = 0; + bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000); + bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500); + bfqd->bfq_raising_max_softrt_rate = 7000; + + /* Initially estimate the device's peak rate as the reference rate */ + if (blk_queue_nonrot(bfqd->queue)) { + bfqd->RT_prod = R_nonrot * T_nonrot; + bfqd->peak_rate = R_nonrot; + } else { + bfqd->RT_prod = R_rot * T_rot; + bfqd->peak_rate = R_rot; + } + + return bfqd; +} + +static void bfq_slab_kill(void) +{ + if (bfq_pool != NULL) + kmem_cache_destroy(bfq_pool); + if (bfq_ioc_pool != NULL) + kmem_cache_destroy(bfq_ioc_pool); +} + +static int __init bfq_slab_setup(void) +{ + bfq_pool = KMEM_CACHE(bfq_queue, 0); + if (bfq_pool == NULL) + goto fail; + + bfq_ioc_pool = kmem_cache_create("bfq_io_context", + sizeof(struct cfq_io_context), + __alignof__(struct cfq_io_context), + 0, NULL); + if (bfq_ioc_pool == NULL) + goto fail; + + return 0; +fail: + bfq_slab_kill(); + return -ENOMEM; +} + +static ssize_t bfq_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count) +{ + unsigned long new_val; + int ret = strict_strtoul(page, 10, &new_val); + + if (ret == 0) + *var = new_val; + + return count; +} + +static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page) +{ + struct bfq_data *bfqd = e->elevator_data; + return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ? + bfqd->bfq_raising_max_time : + bfq_wrais_duration(bfqd)); +} + +static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) +{ + struct bfq_queue *bfqq; + struct bfq_data *bfqd = e->elevator_data; + ssize_t num_char = 0; + + num_char += sprintf(page + num_char, "Active:\n"); + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_rais_start_finish), + jiffies_to_msecs(bfqq->raising_cur_max_time)); + } + num_char += sprintf(page + num_char, "Idle:\n"); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_rais_start_finish), + jiffies_to_msecs(bfqq->raising_cur_max_time)); + } + return num_char; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return bfq_var_show(__data, (page)); \ +} +SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); +SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); +SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); +SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); +SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); +SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); +SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); +SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0); +SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); +SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); +SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); +SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0); +SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1); +SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time, + 1); +SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show, + bfqd->bfq_raising_min_inter_arr_async, + 1); +SHOW_FUNCTION(bfq_raising_max_softrt_rate_show, + bfqd->bfq_raising_max_softrt_rate, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t \ +__FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned long __data; \ + int ret = bfq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); +STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); +STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, + INT_MAX, 0); +STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, + 1, INT_MAX, 0); +STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1, + INT_MAX, 0); +STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_raising_min_idle_time_store, + &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_raising_min_inter_arr_async_store, + &bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_raising_max_softrt_rate_store, + &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +/* do nothing for the moment */ +static ssize_t bfq_weights_store(struct elevator_queue *e, + const char *page, size_t count) +{ + return count; +} + +static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) +{ + u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) + return bfq_calc_max_budget(bfqd->peak_rate, timeout); + else + return bfq_default_max_budget; +} + +static ssize_t bfq_max_budget_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long __data; + int ret = bfq_var_store(&__data, (page), count); + + if (__data == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + else { + if (__data > INT_MAX) + __data = INT_MAX; + bfqd->bfq_max_budget = __data; + } + + bfqd->bfq_user_max_budget = __data; + + return ret; +} + +static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long __data; + int ret = bfq_var_store(&__data, (page), count); + + if (__data < 1) + __data = 1; + else if (__data > INT_MAX) + __data = INT_MAX; + + bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); + if (bfqd->bfq_user_max_budget == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + + return ret; +} + +static ssize_t bfq_low_latency_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long __data; + int ret = bfq_var_store(&__data, (page), count); + + if (__data > 1) + __data = 1; + bfqd->low_latency = __data; + + return ret; +} + +#define BFQ_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) + +static struct elv_fs_entry bfq_attrs[] = { + BFQ_ATTR(quantum), + BFQ_ATTR(fifo_expire_sync), + BFQ_ATTR(fifo_expire_async), + BFQ_ATTR(back_seek_max), + BFQ_ATTR(back_seek_penalty), + BFQ_ATTR(slice_idle), + BFQ_ATTR(max_budget), + BFQ_ATTR(max_budget_async_rq), + BFQ_ATTR(timeout_sync), + BFQ_ATTR(timeout_async), + BFQ_ATTR(low_latency), + BFQ_ATTR(raising_coeff), + BFQ_ATTR(raising_max_time), + BFQ_ATTR(raising_rt_max_time), + BFQ_ATTR(raising_min_idle_time), + BFQ_ATTR(raising_min_inter_arr_async), + BFQ_ATTR(raising_max_softrt_rate), + BFQ_ATTR(weights), + __ATTR_NULL +}; + +static struct elevator_type iosched_bfq = { + .ops = { + .elevator_merge_fn = bfq_merge, + .elevator_merged_fn = bfq_merged_request, + .elevator_merge_req_fn = bfq_merged_requests, + .elevator_allow_merge_fn = bfq_allow_merge, + .elevator_dispatch_fn = bfq_dispatch_requests, + .elevator_add_req_fn = bfq_insert_request, + .elevator_activate_req_fn = bfq_activate_request, + .elevator_deactivate_req_fn = bfq_deactivate_request, + .elevator_completed_req_fn = bfq_completed_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_set_req_fn = bfq_set_request, + .elevator_put_req_fn = bfq_put_request, + .elevator_may_queue_fn = bfq_may_queue, + .elevator_init_fn = bfq_init_queue, + .elevator_exit_fn = bfq_exit_queue, + .trim = bfq_free_io_context, + }, + .elevator_attrs = bfq_attrs, + .elevator_name = "bfq", + .elevator_owner = THIS_MODULE, +}; + +static int __init bfq_init(void) +{ + /* + * Can be 0 on HZ < 1000 setups. + */ + if (bfq_slice_idle == 0) + bfq_slice_idle = 1; + + if (bfq_timeout_async == 0) + bfq_timeout_async = 1; + + if (bfq_slab_setup()) + return -ENOMEM; + + elv_register(&iosched_bfq); + + return 0; +} + +static void __exit bfq_exit(void) +{ + DECLARE_COMPLETION_ONSTACK(all_gone); + elv_unregister(&iosched_bfq); + bfq_ioc_gone = &all_gone; + /* bfq_ioc_gone's update must be visible before reading bfq_ioc_count */ + smp_wmb(); + if (elv_ioc_count_read(bfq_ioc_count) != 0) + wait_for_completion(&all_gone); + ida_destroy(&cic_index_ida); + bfq_slab_kill(); +} + +module_init(bfq_init); +module_exit(bfq_exit); + +MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 index 00000000000..fd50b7fd130 --- /dev/null +++ b/block/bfq-sched.c @@ -0,0 +1,1066 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + */ + +#ifdef CONFIG_CGROUP_BFQIO +#define for_each_entity(entity) \ + for (; entity != NULL; entity = entity->parent) + +#define for_each_entity_safe(entity, parent) \ + for (; entity && ({ parent = entity->parent; 1; }); entity = parent) + +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd); + +static inline void bfq_update_budget(struct bfq_entity *next_active) +{ + struct bfq_entity *bfqg_entity; + struct bfq_group *bfqg; + struct bfq_sched_data *group_sd; + + BUG_ON(next_active == NULL); + + group_sd = next_active->sched_data; + + bfqg = container_of(group_sd, struct bfq_group, sched_data); + /* + * bfq_group's my_entity field is not NULL only if the group + * is not the root group. We must not touch the root entity + * as it must never become an active entity. + */ + bfqg_entity = bfqg->my_entity; + if (bfqg_entity != NULL) + bfqg_entity->budget = next_active->budget; +} + +static int bfq_update_next_active(struct bfq_sched_data *sd) +{ + struct bfq_entity *next_active; + + if (sd->active_entity != NULL) + /* will update/requeue at the end of service */ + return 0; + + /* + * NOTE: this can be improved in many ways, such as returning + * 1 (and thus propagating upwards the update) only when the + * budget changes, or caching the bfqq that will be scheduled + * next from this subtree. By now we worry more about + * correctness than about performance... + */ + next_active = bfq_lookup_next_entity(sd, 0, NULL); + sd->next_active = next_active; + + if (next_active != NULL) + bfq_update_budget(next_active); + + return 1; +} + +static inline void bfq_check_next_active(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ + BUG_ON(sd->next_active != entity); +} +#else +#define for_each_entity(entity) \ + for (; entity != NULL; entity = NULL) + +#define for_each_entity_safe(entity, parent) \ + for (parent = NULL; entity != NULL; entity = parent) + +static inline int bfq_update_next_active(struct bfq_sched_data *sd) +{ + return 0; +} + +static inline void bfq_check_next_active(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ +} + +static inline void bfq_update_budget(struct bfq_entity *next_active) +{ +} +#endif + +/* + * Shift for timestamp calculations. This actually limits the maximum + * service allowed in one timestamp delta (small shift values increase it), + * the maximum total weight that can be used for the queues in the system + * (big shift values increase it), and the period of virtual time wraparounds. + */ +#define WFQ_SERVICE_SHIFT 22 + +/** + * bfq_gt - compare two timestamps. + * @a: first ts. + * @b: second ts. + * + * Return @a > @b, dealing with wrapping correctly. + */ +static inline int bfq_gt(u64 a, u64 b) +{ + return (s64)(a - b) > 0; +} + +static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = NULL; + + BUG_ON(entity == NULL); + + if (entity->my_sched_data == NULL) + bfqq = container_of(entity, struct bfq_queue, entity); + + return bfqq; +} + + +/** + * bfq_delta - map service into the virtual time domain. + * @service: amount of service. + * @weight: scale factor (weight of an entity or weight sum). + */ +static inline u64 bfq_delta(unsigned long service, + unsigned long weight) +{ + u64 d = (u64)service << WFQ_SERVICE_SHIFT; + + do_div(d, weight); + return d; +} + +/** + * bfq_calc_finish - assign the finish time to an entity. + * @entity: the entity to act upon. + * @service: the service to be charged to the entity. + */ +static inline void bfq_calc_finish(struct bfq_entity *entity, + unsigned long service) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(entity->weight == 0); + + entity->finish = entity->start + + bfq_delta(service, entity->weight); + + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: serv %lu, w %d", + service, entity->weight); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: start %llu, finish %llu, delta %llu", + entity->start, entity->finish, + bfq_delta(service, entity->weight)); + } +} + +/** + * bfq_entity_of - get an entity from a node. + * @node: the node field of the entity. + * + * Convert a node pointer to the relative entity. This is used only + * to simplify the logic of some functions and not as the generic + * conversion mechanism because, e.g., in the tree walking functions, + * the check for a %NULL value would be redundant. + */ +static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) +{ + struct bfq_entity *entity = NULL; + + if (node != NULL) + entity = rb_entry(node, struct bfq_entity, rb_node); + + return entity; +} + +/** + * bfq_extract - remove an entity from a tree. + * @root: the tree root. + * @entity: the entity to remove. + */ +static inline void bfq_extract(struct rb_root *root, + struct bfq_entity *entity) +{ + BUG_ON(entity->tree != root); + + entity->tree = NULL; + rb_erase(&entity->rb_node, root); +} + +/** + * bfq_idle_extract - extract an entity from the idle tree. + * @st: the service tree of the owning @entity. + * @entity: the entity being removed. + */ +static void bfq_idle_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *next; + + BUG_ON(entity->tree != &st->idle); + + if (entity == st->first_idle) { + next = rb_next(&entity->rb_node); + st->first_idle = bfq_entity_of(next); + } + + if (entity == st->last_idle) { + next = rb_prev(&entity->rb_node); + st->last_idle = bfq_entity_of(next); + } + + bfq_extract(&st->idle, entity); + + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +} + +/** + * bfq_insert - generic tree insertion. + * @root: tree root. + * @entity: entity to insert. + * + * This is used for the idle and the active tree, since they are both + * ordered by finish time. + */ +static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) +{ + struct bfq_entity *entry; + struct rb_node **node = &root->rb_node; + struct rb_node *parent = NULL; + + BUG_ON(entity->tree != NULL); + + while (*node != NULL) { + parent = *node; + entry = rb_entry(parent, struct bfq_entity, rb_node); + + if (bfq_gt(entry->finish, entity->finish)) + node = &parent->rb_left; + else + node = &parent->rb_right; + } + + rb_link_node(&entity->rb_node, parent, node); + rb_insert_color(&entity->rb_node, root); + + entity->tree = root; +} + +/** + * bfq_update_min - update the min_start field of a entity. + * @entity: the entity to update. + * @node: one of its children. + * + * This function is called when @entity may store an invalid value for + * min_start due to updates to the active tree. The function assumes + * that the subtree rooted at @node (which may be its left or its right + * child) has a valid min_start value. + */ +static inline void bfq_update_min(struct bfq_entity *entity, + struct rb_node *node) +{ + struct bfq_entity *child; + + if (node != NULL) { + child = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entity->min_start, child->min_start)) + entity->min_start = child->min_start; + } +} + +/** + * bfq_update_active_node - recalculate min_start. + * @node: the node to update. + * + * @node may have changed position or one of its children may have moved, + * this function updates its min_start value. The left and right subtrees + * are assumed to hold a correct min_start value. + */ +static inline void bfq_update_active_node(struct rb_node *node) +{ + struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); + + entity->min_start = entity->start; + bfq_update_min(entity, node->rb_right); + bfq_update_min(entity, node->rb_left); +} + +/** + * bfq_update_active_tree - update min_start for the whole active tree. + * @node: the starting node. + * + * @node must be the deepest modified node after an update. This function + * updates its min_start using the values held by its children, assuming + * that they did not change, and then updates all the nodes that may have + * changed in the path to the root. The only nodes that may have changed + * are the ones in the path or their siblings. + */ +static void bfq_update_active_tree(struct rb_node *node) +{ + struct rb_node *parent; + +up: + bfq_update_active_node(node); + + parent = rb_parent(node); + if (parent == NULL) + return; + + if (node == parent->rb_left && parent->rb_right != NULL) + bfq_update_active_node(parent->rb_right); + else if (parent->rb_left != NULL) + bfq_update_active_node(parent->rb_left); + + node = parent; + goto up; +} + +/** + * bfq_active_insert - insert an entity in the active tree of its group/device. + * @st: the service tree of the entity. + * @entity: the entity being inserted. + * + * The active tree is ordered by finish time, but an extra key is kept + * per each node, containing the minimum value for the start times of + * its children (and the node itself), so it's possible to search for + * the eligible node with the lowest finish time in logarithmic time. + */ +static void bfq_active_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node = &entity->rb_node; + + bfq_insert(&st->active, entity); + + if (node->rb_left != NULL) + node = node->rb_left; + else if (node->rb_right != NULL) + node = node->rb_right; + + bfq_update_active_tree(node); + + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); +} + +/** + * bfq_ioprio_to_weight - calc a weight from an ioprio. + * @ioprio: the ioprio value to convert. + */ +static unsigned short bfq_ioprio_to_weight(int ioprio) +{ + WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); + return IOPRIO_BE_NR - ioprio; +} + +/** + * bfq_weight_to_ioprio - calc an ioprio from a weight. + * @weight: the weight value to convert. + * + * To preserve as mush as possible the old only-ioprio user interface, + * 0 is used as an escape ioprio value for weights (numerically) equal or + * larger than IOPRIO_BE_NR + */ +static unsigned short bfq_weight_to_ioprio(int weight) +{ + WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); + return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; +} + +static inline void bfq_get_entity(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + if (bfqq != NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + } +} + +/** + * bfq_find_deepest - find the deepest node that an extraction can modify. + * @node: the node being removed. + * + * Do the first step of an extraction in an rb tree, looking for the + * node that will replace @node, and returning the deepest node that + * the following modifications to the tree can touch. If @node is the + * last node in the tree return %NULL. + */ +static struct rb_node *bfq_find_deepest(struct rb_node *node) +{ + struct rb_node *deepest; + + if (node->rb_right == NULL && node->rb_left == NULL) + deepest = rb_parent(node); + else if (node->rb_right == NULL) + deepest = node->rb_left; + else if (node->rb_left == NULL) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right != NULL) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/** + * bfq_active_extract - remove an entity from the active tree. + * @st: the service_tree containing the tree. + * @entity: the entity being removed. + */ +static void bfq_active_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node; + + node = bfq_find_deepest(&entity->rb_node); + bfq_extract(&st->active, entity); + + if (node != NULL) + bfq_update_active_tree(node); + + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +} + +/** + * bfq_idle_insert - insert an entity into the idle tree. + * @st: the service tree containing the tree. + * @entity: the entity to insert. + */ +static void bfq_idle_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) + st->first_idle = entity; + if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) + st->last_idle = entity; + + bfq_insert(&st->idle, entity); + + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); +} + +/** + * bfq_forget_entity - remove an entity from the wfq trees. + * @st: the service tree. + * @entity: the entity being removed. + * + * Update the device status and forget everything about @entity, putting + * the device reference to it, if it is a queue. Entities belonging to + * groups are not refcounted. + */ +static void bfq_forget_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(!entity->on_st); + + entity->on_st = 0; + st->wsum -= entity->weight; + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +/** + * bfq_put_idle_entity - release the idle tree ref of an entity. + * @st: service tree for the entity. + * @entity: the entity being released. + */ +static void bfq_put_idle_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + bfq_idle_extract(st, entity); + bfq_forget_entity(st, entity); +} + +/** + * bfq_forget_idle - update the idle tree if necessary. + * @st: the service tree to act upon. + * + * To preserve the global O(log N) complexity we only remove one entry here; + * as the idle tree will not grow indefinitely this can be done safely. + */ +static void bfq_forget_idle(struct bfq_service_tree *st) +{ + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && + !bfq_gt(last_idle->finish, st->vtime)) { + /* + * Forget the whole idle tree, increasing the vtime past + * the last finish time of idle entities. + */ + st->vtime = last_idle->finish; + } + + if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) + bfq_put_idle_entity(st, first_idle); +} + +static struct bfq_service_tree * +__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, + struct bfq_entity *entity) +{ + struct bfq_service_tree *new_st = old_st; + + if (entity->ioprio_changed) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(old_st->wsum < entity->weight); + old_st->wsum -= entity->weight; + + if (entity->new_weight != entity->orig_weight) { + entity->orig_weight = entity->new_weight; + entity->ioprio = + bfq_weight_to_ioprio(entity->orig_weight); + } else if (entity->new_ioprio != entity->ioprio) { + entity->ioprio = entity->new_ioprio; + entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + } else + entity->new_weight = entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + + entity->ioprio_class = entity->new_ioprio_class; + entity->ioprio_changed = 0; + + /* + * NOTE: here we may be changing the weight too early, + * this will cause unfairness. The correct approach + * would have required additional complexity to defer + * weight changes to the proper time instants (i.e., + * when entity->finish <= old_st->vtime). + */ + new_st = bfq_entity_service_tree(entity); + entity->weight = entity->orig_weight * + (bfqq != NULL ? bfqq->raising_coeff : 1); + new_st->wsum += entity->weight; + + if (new_st != old_st) + entity->start = new_st->vtime; + } + + return new_st; +} + +/** + * bfq_bfqq_served - update the scheduler status after selection for service. + * @bfqq: the queue being served. + * @served: bytes to transfer. + * + * NOTE: this can be optimized, as the timestamps of upper level entities + * are synchronized every time a new bfqq is selected for service. By now, + * we keep it to better check consistency. + */ +static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st; + + for_each_entity(entity) { + st = bfq_entity_service_tree(entity); + + entity->service += served; + BUG_ON(entity->service > entity->budget); + BUG_ON(st->wsum == 0); + + st->vtime += bfq_delta(served, st->wsum); + bfq_forget_idle(st); + } + bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); +} + +/** + * bfq_bfqq_charge_full_budget - set the service to the entity budget. + * @bfqq: the queue that needs a service update. + * + * When it's not possible to be fair in the service domain, because + * a queue is not consuming its budget fast enough (the meaning of + * fast depends on the timeout parameter), we charge it a full + * budget. In this way we should obtain a sort of time-domain + * fairness among all the seeky/slow queues. + */ +static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); + + bfq_bfqq_served(bfqq, entity->budget - entity->service); +} + +/** + * __bfq_activate_entity - activate an entity. + * @entity: the entity being activated. + * + * Called whenever an entity is activated, i.e., it is not active and one + * of its children receives a new request, or has to be reactivated due to + * budget exhaustion. It uses the current budget of the entity (and the + * service received if @entity is active) of the queue to calculate its + * timestamps. + */ +static void __bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + + if (entity == sd->active_entity) { + BUG_ON(entity->tree != NULL); + /* + * If we are requeueing the current entity we have + * to take care of not charging to it service it has + * not received. + */ + bfq_calc_finish(entity, entity->service); + entity->start = entity->finish; + sd->active_entity = NULL; + } else if (entity->tree == &st->active) { + /* + * Requeueing an entity due to a change of some + * next_active entity below it. We reuse the old + * start time. + */ + bfq_active_extract(st, entity); + } else if (entity->tree == &st->idle) { + /* + * Must be on the idle tree, bfq_idle_extract() will + * check for that. + */ + bfq_idle_extract(st, entity); + entity->start = bfq_gt(st->vtime, entity->finish) ? + st->vtime : entity->finish; + } else { + /* + * The finish time of the entity may be invalid, and + * it is in the past for sure, otherwise the queue + * would have been on the idle tree. + */ + entity->start = st->vtime; + st->wsum += entity->weight; + bfq_get_entity(entity); + + BUG_ON(entity->on_st); + entity->on_st = 1; + } + + st = __bfq_entity_update_weight_prio(st, entity); + bfq_calc_finish(entity, entity->budget); + bfq_active_insert(st, entity); +} + +/** + * bfq_activate_entity - activate an entity and its ancestors if necessary. + * @entity: the entity to activate. + * + * Activate @entity and all the entities on the path from it to the root. + */ +static void bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd; + + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_active(sd)) + /* + * No need to propagate the activation to the + * upper entities, as they will be updated when + * the active entity is rescheduled. + */ + break; + } +} + +/** + * __bfq_deactivate_entity - deactivate an entity from its service tree. + * @entity: the entity to deactivate. + * @requeue: if false, the entity will not be put into the idle tree. + * + * Deactivate an entity, independently from its previous state. If the + * entity was not on a service tree just return, otherwise if it is on + * any scheduler tree, extract it from that tree, and if necessary + * and if the caller did not specify @requeue, put it on the idle tree. + * + * Return %1 if the caller should update the entity hierarchy, i.e., + * if the entity was under service or if it was the next_active for + * its sched_data; return %0 otherwise. + */ +static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + int was_active = entity == sd->active_entity; + int ret = 0; + + if (!entity->on_st) + return 0; + + BUG_ON(was_active && entity->tree != NULL); + + if (was_active) { + bfq_calc_finish(entity, entity->service); + sd->active_entity = NULL; + } else if (entity->tree == &st->active) + bfq_active_extract(st, entity); + else if (entity->tree == &st->idle) + bfq_idle_extract(st, entity); + else if (entity->tree != NULL) + BUG(); + + if (was_active || sd->next_active == entity) + ret = bfq_update_next_active(sd); + + if (!requeue || !bfq_gt(entity->finish, st->vtime)) + bfq_forget_entity(st, entity); + else + bfq_idle_insert(st, entity); + + BUG_ON(sd->active_entity == entity); + BUG_ON(sd->next_active == entity); + + return ret; +} + +/** + * bfq_deactivate_entity - deactivate an entity. + * @entity: the entity to deactivate. + * @requeue: true if the entity can be put on the idle tree + */ +static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd; + struct bfq_entity *parent; + + for_each_entity_safe(entity, parent) { + sd = entity->sched_data; + + if (!__bfq_deactivate_entity(entity, requeue)) + /* + * The parent entity is still backlogged, and + * we don't need to update it as it is still + * under service. + */ + break; + + if (sd->next_active != NULL) + /* + * The parent entity is still backlogged and + * the budgets on the path towards the root + * need to be updated. + */ + goto update; + + /* + * If we reach there the parent is no more backlogged and + * we want to propagate the dequeue upwards. + */ + requeue = 1; + } + + return; + +update: + entity = parent; + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_active(sd)) + break; + } +} + +/** + * bfq_update_vtime - update vtime if necessary. + * @st: the service tree to act upon. + * + * If necessary update the service tree vtime to have at least one + * eligible entity, skipping to its start time. Assumes that the + * active tree of the device is not empty. + * + * NOTE: this hierarchical implementation updates vtimes quite often, + * we may end up with reactivated tasks getting timestamps after a + * vtime skip done because we needed a ->first_active entity on some + * intermediate node. + */ +static void bfq_update_vtime(struct bfq_service_tree *st) +{ + struct bfq_entity *entry; + struct rb_node *node = st->active.rb_node; + + entry = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entry->min_start, st->vtime)) { + st->vtime = entry->min_start; + bfq_forget_idle(st); + } +} + +/** + * bfq_first_active - find the eligible entity with the smallest finish time + * @st: the service tree to select from. + * + * This function searches the first schedulable entity, starting from the + * root of the tree and going on the left every time on this side there is + * a subtree with at least one eligible (start >= vtime) entity. The path + * on the right is followed only if a) the left subtree contains no eligible + * entities and b) no eligible entity has been found yet. + */ +static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) +{ + struct bfq_entity *entry, *first = NULL; + struct rb_node *node = st->active.rb_node; + + while (node != NULL) { + entry = rb_entry(node, struct bfq_entity, rb_node); +left: + if (!bfq_gt(entry->start, st->vtime)) + first = entry; + + BUG_ON(bfq_gt(entry->min_start, st->vtime)); + + if (node->rb_left != NULL) { + entry = rb_entry(node->rb_left, + struct bfq_entity, rb_node); + if (!bfq_gt(entry->min_start, st->vtime)) { + node = node->rb_left; + goto left; + } + } + if (first != NULL) + break; + node = node->rb_right; + } + + BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); + return first; +} + +/** + * __bfq_lookup_next_entity - return the first eligible entity in @st. + * @st: the service tree. + * + * Update the virtual time in @st and return the first eligible entity + * it contains. + */ +static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, + bool force) +{ + struct bfq_entity *entity, *new_next_active = NULL; + + if (RB_EMPTY_ROOT(&st->active)) + return NULL; + + bfq_update_vtime(st); + entity = bfq_first_active_entity(st); + BUG_ON(bfq_gt(entity->start, st->vtime)); + + /* + * If the chosen entity does not match with the sched_data's + * next_active and we are forcedly serving the IDLE priority + * class tree, bubble up budget update. + */ + if (unlikely(force && entity != entity->sched_data->next_active)) { + new_next_active = entity; + for_each_entity(new_next_active) + bfq_update_budget(new_next_active); + } + + return entity; +} + +/** + * bfq_lookup_next_entity - return the first eligible entity in @sd. + * @sd: the sched_data. + * @extract: if true the returned entity will be also extracted from @sd. + * + * NOTE: since we cache the next_active entity at each level of the + * hierarchy, the complexity of the lookup can be decreased with + * absolutely no effort just returning the cached next_active value; + * we prefer to do full lookups to test the consistency of * the data + * structures. + */ +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd) +{ + struct bfq_service_tree *st = sd->service_tree; + struct bfq_entity *entity; + int i=0; + + BUG_ON(sd->active_entity != NULL); + + if (bfqd != NULL && + jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { + entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true); + if (entity != NULL) { + i = BFQ_IOPRIO_CLASSES - 1; + bfqd->bfq_class_idle_last_service = jiffies; + sd->next_active = entity; + } + } + for (; i < BFQ_IOPRIO_CLASSES; i++) { + entity = __bfq_lookup_next_entity(st + i, false); + if (entity != NULL) { + if (extract) { + bfq_check_next_active(sd, entity); + bfq_active_extract(st + i, entity); + sd->active_entity = entity; + sd->next_active = NULL; + } + break; + } + } + + return entity; +} + +/* + * Get next queue for service. + */ +static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) +{ + struct bfq_entity *entity = NULL; + struct bfq_sched_data *sd; + struct bfq_queue *bfqq; + + BUG_ON(bfqd->active_queue != NULL); + + if (bfqd->busy_queues == 0) + return NULL; + + sd = &bfqd->root_group->sched_data; + for (; sd != NULL; sd = entity->my_sched_data) { + entity = bfq_lookup_next_entity(sd, 1, bfqd); + BUG_ON(entity == NULL); + entity->service = 0; + } + + bfqq = bfq_entity_to_bfqq(entity); + BUG_ON(bfqq == NULL); + + return bfqq; +} + +/* + * Forced extraction of the given queue. + */ +static void bfq_get_next_queue_forced(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + struct bfq_entity *entity; + struct bfq_sched_data *sd; + + BUG_ON(bfqd->active_queue != NULL); + + entity = &bfqq->entity; + /* + * Bubble up extraction/update from the leaf to the root. + */ + for_each_entity(entity) { + sd = entity->sched_data; + bfq_update_budget(entity); + bfq_update_vtime(bfq_entity_service_tree(entity)); + bfq_active_extract(bfq_entity_service_tree(entity), entity); + sd->active_entity = entity; + sd->next_active = NULL; + entity->service = 0; + } + + return; +} + +static void __bfq_bfqd_reset_active(struct bfq_data *bfqd) +{ + if (bfqd->active_cic != NULL) { + put_io_context(bfqd->active_cic->ioc); + bfqd->active_cic = NULL; + } + + bfqd->active_queue = NULL; + del_timer(&bfqd->idle_slice_timer); +} + +static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + struct bfq_entity *entity = &bfqq->entity; + + if (bfqq == bfqd->active_queue) + __bfq_bfqd_reset_active(bfqd); + + bfq_deactivate_entity(entity, requeue); +} + +static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_activate_entity(entity); +} + +/* + * Called when the bfqq no longer has requests pending, remove it from + * the service tree. + */ +static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + BUG_ON(!bfq_bfqq_busy(bfqq)); + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + bfq_log_bfqq(bfqd, bfqq, "del from busy"); + + bfq_clear_bfqq_busy(bfqq); + + BUG_ON(bfqd->busy_queues == 0); + bfqd->busy_queues--; + + bfq_deactivate_bfqq(bfqd, bfqq, requeue); +} + +/* + * Called when an inactive queue receives a new request. + */ +static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqq == bfqd->active_queue); + + bfq_log_bfqq(bfqd, bfqq, "add to busy"); + + bfq_activate_bfqq(bfqd, bfqq); + + bfq_mark_bfqq_busy(bfqq); + bfqd->busy_queues++; +} diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 index 00000000000..be2c572978c --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,593 @@ +/* + * BFQ-v5 for 3.1.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + */ + +#ifndef _BFQ_H +#define _BFQ_H + +#include +#include +#include +#include + +#define BFQ_IOPRIO_CLASSES 3 +#define BFQ_CL_IDLE_TIMEOUT HZ/5 + +#define BFQ_MIN_WEIGHT 1 +#define BFQ_MAX_WEIGHT 1000 + +#define BFQ_DEFAULT_GRP_WEIGHT 10 +#define BFQ_DEFAULT_GRP_IOPRIO 0 +#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE + +struct bfq_entity; + +/** + * struct bfq_service_tree - per ioprio_class service tree. + * @active: tree for active entities (i.e., those backlogged). + * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). + * @first_idle: idle entity with minimum F_i. + * @last_idle: idle entity with maximum F_i. + * @vtime: scheduler virtual time. + * @wsum: scheduler weight sum; active and idle entities contribute to it. + * + * Each service tree represents a B-WF2Q+ scheduler on its own. Each + * ioprio_class has its own independent scheduler, and so its own + * bfq_service_tree. All the fields are protected by the queue lock + * of the containing bfqd. + */ +struct bfq_service_tree { + struct rb_root active; + struct rb_root idle; + + struct bfq_entity *first_idle; + struct bfq_entity *last_idle; + + u64 vtime; + unsigned long wsum; +}; + +/** + * struct bfq_sched_data - multi-class scheduler. + * @active_entity: entity under service. + * @next_active: head-of-the-line entity in the scheduler. + * @service_tree: array of service trees, one per ioprio_class. + * + * bfq_sched_data is the basic scheduler queue. It supports three + * ioprio_classes, and can be used either as a toplevel queue or as + * an intermediate queue on a hierarchical setup. + * @next_active points to the active entity of the sched_data service + * trees that will be scheduled next. + * + * The supported ioprio_classes are the same as in CFQ, in descending + * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. + * Requests from higher priority queues are served before all the + * requests from lower priority queues; among requests of the same + * queue requests are served according to B-WF2Q+. + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_sched_data { + struct bfq_entity *active_entity; + struct bfq_entity *next_active; + struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; +}; + +/** + * struct bfq_entity - schedulable entity. + * @rb_node: service_tree member. + * @on_st: flag, true if the entity is on a tree (either the active or + * the idle one of its service_tree). + * @finish: B-WF2Q+ finish timestamp (aka F_i). + * @start: B-WF2Q+ start timestamp (aka S_i). + * @tree: tree the entity is enqueued into; %NULL if not on a tree. + * @min_start: minimum start time of the (active) subtree rooted at + * this entity; used for O(log N) lookups into active trees. + * @service: service received during the last round of service. + * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. + * @weight: weight of the queue + * @parent: parent entity, for hierarchical scheduling. + * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the + * associated scheduler queue, %NULL on leaf nodes. + * @sched_data: the scheduler queue this entity belongs to. + * @ioprio: the ioprio in use. + * @new_weight: when a weight change is requested, the new weight value. + * @orig_weight: original weight, used to implement weight boosting + * @new_ioprio: when an ioprio change is requested, the new ioprio value. + * @ioprio_class: the ioprio_class in use. + * @new_ioprio_class: when an ioprio_class change is requested, the new + * ioprio_class value. + * @ioprio_changed: flag, true when the user requested a weight, ioprio or + * ioprio_class change. + * + * A bfq_entity is used to represent either a bfq_queue (leaf node in the + * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each + * entity belongs to the sched_data of the parent group in the cgroup + * hierarchy. Non-leaf entities have also their own sched_data, stored + * in @my_sched_data. + * + * Each entity stores independently its priority values; this would + * allow different weights on different devices, but this + * functionality is not exported to userspace by now. Priorities and + * weights are updated lazily, first storing the new values into the + * new_* fields, then setting the @ioprio_changed flag. As soon as + * there is a transition in the entity state that allows the priority + * update to take place the effective and the requested priority + * values are synchronized. + * + * Unless cgroups are used, the weight value is calculated from the + * ioprio to export the same interface as CFQ. When dealing with + * ``well-behaved'' queues (i.e., queues that do not spend too much + * time to consume their budget and have true sequential behavior, and + * when there are no external factors breaking anticipation) the + * relative weights at each level of the cgroups hierarchy should be + * guaranteed. All the fields are protected by the queue lock of the + * containing bfqd. + */ +struct bfq_entity { + struct rb_node rb_node; + + int on_st; + + u64 finish; + u64 start; + + struct rb_root *tree; + + u64 min_start; + + unsigned long service, budget; + unsigned short weight, new_weight; + unsigned short orig_weight; + + struct bfq_entity *parent; + + struct bfq_sched_data *my_sched_data; + struct bfq_sched_data *sched_data; + + unsigned short ioprio, new_ioprio; + unsigned short ioprio_class, new_ioprio_class; + + int ioprio_changed; +}; + +struct bfq_group; + +/** + * struct bfq_queue - leaf schedulable entity. + * @ref: reference counter. + * @bfqd: parent bfq_data. + * @new_bfqq: shared bfq_queue if queue is cooperating with + * one or more other queues. + * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). + * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). + * @sort_list: sorted list of pending requests. + * @next_rq: if fifo isn't expired, next request to serve. + * @queued: nr of requests queued in @sort_list. + * @allocated: currently allocated requests. + * @meta_pending: pending metadata requests. + * @fifo: fifo list of requests in sort_list. + * @entity: entity representing this queue in the scheduler. + * @max_budget: maximum budget allowed from the feedback mechanism. + * @budget_timeout: budget expiration (in jiffies). + * @dispatched: number of requests on the dispatch list or inside driver. + * @org_ioprio: saved ioprio during boosted periods. + * @flags: status flags. + * @bfqq_list: node for active/idle bfqq list inside our bfqd. + * @seek_samples: number of seeks sampled + * @seek_total: sum of the distances of the seeks sampled + * @seek_mean: mean seek distance + * @last_request_pos: position of the last request enqueued + * @pid: pid of the process owning the queue, used for logging purposes. + * @last_rais_start_time: last (idle -> weight-raised) transition attempt + * @raising_cur_max_time: current max raising time for this queue + * + * A bfq_queue is a leaf request queue; it can be associated to an io_context + * or more (if it is an async one). @cgroup holds a reference to the + * cgroup, to be sure that it does not disappear while a bfqq still + * references it (mostly to avoid races between request issuing and task + * migration followed by cgroup distruction). + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_queue { + atomic_t ref; + struct bfq_data *bfqd; + + /* fields for cooperating queues handling */ + struct bfq_queue *new_bfqq; + struct rb_node pos_node; + struct rb_root *pos_root; + + struct rb_root sort_list; + struct request *next_rq; + int queued[2]; + int allocated[2]; + int meta_pending; + struct list_head fifo; + + struct bfq_entity entity; + + unsigned long max_budget; + unsigned long budget_timeout; + + int dispatched; + + unsigned short org_ioprio; + + unsigned int flags; + + struct list_head bfqq_list; + + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + sector_t last_request_pos; + + pid_t pid; + + /* weight-raising fields */ + unsigned int raising_cur_max_time; + u64 last_rais_start_finish, soft_rt_next_start; + unsigned int raising_coeff; +}; + +/** + * struct bfq_data - per device data structure. + * @queue: request queue for the managed device. + * @root_group: root bfq_group for the device. + * @rq_pos_tree: rbtree sorted by next_request position, + * used when determining if two or more queues + * have interleaving requests (see bfq_close_cooperator). + * @busy_queues: number of bfq_queues containing requests (including the + * queue under service, even if it is idling). + * @queued: number of queued requests. + * @rq_in_driver: number of requests dispatched and waiting for completion. + * @sync_flight: number of sync requests in the driver. + * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples + * completed requests . + * @hw_tag_samples: nr of samples used to calculate hw_tag. + * @hw_tag: flag set to one if the driver is showing a queueing behavior. + * @budgets_assigned: number of budgets assigned. + * @idle_slice_timer: timer set when idling for the next sequential request + * from the queue under service. + * @unplug_work: delayed work to restart dispatching on the request queue. + * @active_queue: bfq_queue under service. + * @active_cic: cfq_io_context (cic) associated with the @active_queue. + * @last_position: on-disk position of the last served request. + * @last_budget_start: beginning of the last budget. + * @last_idling_start: beginning of the last idle slice. + * @peak_rate: peak transfer rate observed for a budget. + * @peak_rate_samples: number of samples used to calculate @peak_rate. + * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling. + * @cic_index: use small consequent indexes as radix tree keys to reduce depth + * @cic_list: list of all the cics active on the bfq_data device. + * @group_list: list of all the bfq_groups active on the device. + * @active_list: list of all the bfq_queues active on the device. + * @idle_list: list of all the bfq_queues idle on the device. + * @bfq_quantum: max number of requests dispatched per dispatch round. + * @bfq_fifo_expire: timeout for async/sync requests; when it expires + * requests are served in fifo order. + * @bfq_back_penalty: weight of backward seeks wrt forward ones. + * @bfq_back_max: maximum allowed backward seek. + * @bfq_slice_idle: maximum idling time. + * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning). + * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to + * async queues. + * @bfq_timeout: timeout for bfq_queues to consume their budget; used to + * to prevent seeky queues to impose long latencies to well + * behaved ones (this also implies that seeky queues cannot + * receive guarantees in the service domain; after a timeout + * they are charged for the whole allocated budget, to try + * to preserve a behavior reasonably fair among them, but + * without service-domain guarantees). + * @bfq_raising_coeff: Maximum factor by which the weight of a boosted + * queue is multiplied + * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies) + * @bfq_raising_rt_max_time: maximum duration for soft real-time processes + * @bfq_raising_min_idle_time: minimum idle period after which weight-raising + * may be reactivated for a queue (in jiffies) + * @bfq_raising_min_inter_arr_async: minimum period between request arrivals + * after which weight-raising may be + * reactivated for an already busy queue + * (in jiffies) + * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue, + * sectors per seconds + * @RT_prod: cached value of the product R*T used for computing the maximum + * duration of the weight raising automatically + * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions + * + * All the fields are protected by the @queue lock. + */ +struct bfq_data { + struct request_queue *queue; + + struct bfq_group *root_group; + + struct rb_root rq_pos_tree; + + int busy_queues; + int queued; + int rq_in_driver; + int sync_flight; + + int max_rq_in_driver; + int hw_tag_samples; + int hw_tag; + + int budgets_assigned; + + struct timer_list idle_slice_timer; + struct work_struct unplug_work; + + struct bfq_queue *active_queue; + struct cfq_io_context *active_cic; + + sector_t last_position; + + ktime_t last_budget_start; + ktime_t last_idling_start; + int peak_rate_samples; + u64 peak_rate; + unsigned long bfq_max_budget; + + unsigned int cic_index; + struct list_head cic_list; + struct hlist_head group_list; + struct list_head active_list; + struct list_head idle_list; + + unsigned int bfq_quantum; + unsigned int bfq_fifo_expire[2]; + unsigned int bfq_back_penalty; + unsigned int bfq_back_max; + unsigned int bfq_slice_idle; + u64 bfq_class_idle_last_service; + + unsigned int bfq_user_max_budget; + unsigned int bfq_max_budget_async_rq; + unsigned int bfq_timeout[2]; + + bool low_latency; + + /* parameters of the low_latency heuristics */ + unsigned int bfq_raising_coeff; + unsigned int bfq_raising_max_time; + unsigned int bfq_raising_rt_max_time; + unsigned int bfq_raising_min_idle_time; + unsigned int bfq_raising_min_inter_arr_async; + unsigned int bfq_raising_max_softrt_rate; + u64 RT_prod; + + struct bfq_queue oom_bfqq; +}; + +enum bfqq_state_flags { + BFQ_BFQQ_FLAG_busy = 0, /* has requests or is under service */ + BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ + BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ + BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ + BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ + BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ + BFQ_BFQQ_FLAG_sync, /* synchronous queue */ + BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ + BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ + BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ + BFQ_BFQQ_FLAG_some_coop_idle, /* some cooperator is inactive */ +}; + +#define BFQ_BFQQ_FNS(name) \ +static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ +{ \ + return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ +} + +BFQ_BFQQ_FNS(busy); +BFQ_BFQQ_FNS(wait_request); +BFQ_BFQQ_FNS(must_alloc); +BFQ_BFQQ_FNS(fifo_expire); +BFQ_BFQQ_FNS(idle_window); +BFQ_BFQQ_FNS(prio_changed); +BFQ_BFQQ_FNS(sync); +BFQ_BFQQ_FNS(budget_new); +BFQ_BFQQ_FNS(coop); +BFQ_BFQQ_FNS(split_coop); +BFQ_BFQQ_FNS(some_coop_idle); +#undef BFQ_BFQQ_FNS + +/* Logging facilities. */ +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) + +#define bfq_log(bfqd, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) + +/* Expiration reasons. */ +enum bfqq_expiration { + BFQ_BFQQ_TOO_IDLE = 0, /* queue has been idling for too long */ + BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ + BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ + BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ +}; + +#ifdef CONFIG_CGROUP_BFQIO +/** + * struct bfq_group - per (device, cgroup) data structure. + * @entity: schedulable entity to insert into the parent group sched_data. + * @sched_data: own sched_data, to contain child entities (they may be + * both bfq_queues and bfq_groups). + * @group_node: node to be inserted into the bfqio_cgroup->group_data + * list of the containing cgroup's bfqio_cgroup. + * @bfqd_node: node to be inserted into the @bfqd->group_list list + * of the groups active on the same device; used for cleanup. + * @bfqd: the bfq_data for the device this group acts upon. + * @async_bfqq: array of async queues for all the tasks belonging to + * the group, one queue per ioprio value per ioprio_class, + * except for the idle class that has only one queue. + * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). + * @my_entity: pointer to @entity, %NULL for the toplevel group; used + * to avoid too many special cases during group creation/migration. + * + * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup + * there is a set of bfq_groups, each one collecting the lower-level + * entities belonging to the group that are acting on the same device. + * + * Locking works as follows: + * o @group_node is protected by the bfqio_cgroup lock, and is accessed + * via RCU from its readers. + * o @bfqd is protected by the queue lock, RCU is used to access it + * from the readers. + * o All the other fields are protected by the @bfqd queue lock. + */ +struct bfq_group { + struct bfq_entity entity; + struct bfq_sched_data sched_data; + + struct hlist_node group_node; + struct hlist_node bfqd_node; + + void *bfqd; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; + + struct bfq_entity *my_entity; +}; + +/** + * struct bfqio_cgroup - bfq cgroup data structure. + * @css: subsystem state for bfq in the containing cgroup. + * @weight: cgroup weight. + * @ioprio: cgroup ioprio. + * @ioprio_class: cgroup ioprio_class. + * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. + * @group_data: list containing the bfq_group belonging to this cgroup. + * + * @group_data is accessed using RCU, with @lock protecting the updates, + * @ioprio and @ioprio_class are protected by @lock. + */ +struct bfqio_cgroup { + struct cgroup_subsys_state css; + + unsigned short weight, ioprio, ioprio_class; + + spinlock_t lock; + struct hlist_head group_data; +}; +#else +struct bfq_group { + struct bfq_sched_data sched_data; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; +}; +#endif + +static inline struct bfq_service_tree * +bfq_entity_service_tree(struct bfq_entity *entity) +{ + struct bfq_sched_data *sched_data = entity->sched_data; + unsigned int idx = entity->ioprio_class - 1; + + BUG_ON(idx >= BFQ_IOPRIO_CLASSES); + BUG_ON(sched_data == NULL); + + return sched_data->service_tree + idx; +} + +static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic, + int is_sync) +{ + return cic->cfqq[!!is_sync]; +} + +static inline void cic_set_bfqq(struct cfq_io_context *cic, + struct bfq_queue *bfqq, int is_sync) +{ + cic->cfqq[!!is_sync] = bfqq; +} + +static inline void call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, + struct cfq_io_context *)) +{ + struct cfq_io_context *cic; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + func(ioc, cic); + rcu_read_unlock(); +} + +#define CIC_DEAD_KEY 1ul +#define CIC_DEAD_INDEX_SHIFT 1 + +static inline void *bfqd_dead_key(struct bfq_data *bfqd) +{ + return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); +} + +/** + * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. + * @ptr: a pointer to a bfqd. + * @flags: storage for the flags to be saved. + * + * This function allows cic->key and bfqg->bfqd to be protected by the + * queue lock of the bfqd they reference; the pointer is dereferenced + * under RCU, so the storage for bfqd is assured to be safe as long + * as the RCU read side critical section does not end. After the + * bfqd->queue->queue_lock is taken the pointer is rechecked, to be + * sure that no other writer accessed it. If we raced with a writer, + * the function returns NULL, with the queue unlocked, otherwise it + * returns the dereferenced pointer, with the queue locked. + */ +static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, + unsigned long *flags) +{ + struct bfq_data *bfqd; + + rcu_read_lock(); + bfqd = rcu_dereference(*(struct bfq_data **)ptr); + + if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) { + spin_lock_irqsave(bfqd->queue->queue_lock, *flags); + if (*ptr == bfqd) + goto out; + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); + } + + bfqd = NULL; +out: + rcu_read_unlock(); + return bfqd; +} + +static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, + unsigned long *flags) +{ + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic); +static void bfq_put_queue(struct bfq_queue *bfqq); +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask); +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); +#endif diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 6f9bbd97865..d0d16d4a79a 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -16,13 +17,12 @@ */ static struct kmem_cache *iocontext_cachep; -static void cfq_dtor(struct io_context *ioc) +static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) { - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->dtor(ioc); } } @@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - cfq_dtor(ioc); + + hlist_sched_dtor(ioc, &ioc->cic_list); + hlist_sched_dtor(ioc, &ioc->bfq_cic_list); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void cfq_exit(struct io_context *ioc) +static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) { rcu_read_lock(); - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -74,9 +75,10 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) - cfq_exit(ioc); - + if (atomic_dec_and_test(&ioc->nr_tasks)) { + hlist_sched_exit(ioc, &ioc->cic_list); + hlist_sched_exit(ioc, &ioc->bfq_cic_list); + } put_io_context(ioc); } @@ -89,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - ioc->ioprio_changed = 0; + bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); + INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); + INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4c12869fcf7..36057a141c8 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2934,7 +2934,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); - ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3226,8 +3225,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - smp_read_barrier_depends(); - if (unlikely(ioc->ioprio_changed)) + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED diff --git a/fs/ioprio.c b/fs/ioprio.c index 7da2a06508e..95a6c2b04e0 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err; + int err, i; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } + /* let other ioc users see the new values */ + smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - ioc->ioprio_changed = 1; + /* make sure schedulers see the new ioprio value */ + wmb(); + for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) + set_bit(i, ioc->ioprio_changed); } task_unlock(task); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ac663c18776..c966638399e 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -64,3 +64,9 @@ SUBSYS(perf) #endif /* */ + +#ifdef CONFIG_CGROUP_BFQIO +SUBSYS(bfqio) +#endif + +/* */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 5037a0ad231..69fdd5894ef 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H +#include #include #include -struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,7 +16,7 @@ struct cfq_ttime { struct cfq_io_context { void *key; - struct cfq_queue *cfqq[2]; + void *cfqq[2]; struct io_context *ioc; @@ -31,6 +31,16 @@ struct cfq_io_context { struct rcu_head rcu_head; }; +/* + * Indexes into the ioprio_changed bitmap. A bit set indicates that + * the corresponding I/O scheduler needs to see a ioprio update. + */ +enum { + IOC_CFQ_IOPRIO_CHANGED, + IOC_BFQ_IOPRIO_CHANGED, + IOC_IOPRIO_CHANGED_BITS +}; + /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -43,7 +53,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; + DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -57,6 +67,8 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; + struct radix_tree_root bfq_radix_root; + struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; From f298239a4a165775654ba2627f6869bb0c79b8b1 Mon Sep 17 00:00:00 2001 From: Arianna Avanzini Date: Mon, 10 Dec 2012 13:50:54 +0100 Subject: [PATCH 271/678] block: Switch from BFQ-v5 for 3.1.0 to BFQ-v5r1 for 3.1.0. Improvements: - BUG FIX: Fixed stall occurring when the active queue is moved to a different group while idling (this caused the idling timer to be cancelled and hence no new queue to be selected, and no new request to be dispatched). - BUG FIX: Fixed wrong assignment of too high budgets to queues during the first few seconds after initialization. - BUG FIX: Added proper locking to the function handling the "weights" tunable. Signed-off-by: Paolo Valente Signed-off-by: Mauro Andreolini Signed-off-by: Arianna Avanzini --- block/bfq-cgroup.c | 5 +++++ block/bfq-ioc.c | 2 ++ block/bfq-iosched.c | 11 ++++++++++- block/bfq-sched.c | 2 ++ block/bfq.h | 4 +++- 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 74ae73b91e1..0e575c54f88 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -7,6 +7,8 @@ * Copyright (C) 2008 Fabio Checconi * Paolo Valente * + * Copyright (C) 2010 Paolo Valente + * * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. */ @@ -276,6 +278,9 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (busy && resume) bfq_activate_bfqq(bfqd, bfqq); + + if (bfqd->active_queue == NULL && !bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); } /** diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c index 8f2b6c61d3f..c0671a6b650 100644 --- a/block/bfq-ioc.c +++ b/block/bfq-ioc.c @@ -6,6 +6,8 @@ * * Copyright (C) 2008 Fabio Checconi * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente */ /** diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 576cd03a28c..f067270eb1f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7,6 +7,8 @@ * Copyright (C) 2008 Fabio Checconi * Paolo Valente * + * Copyright (C) 2010 Paolo Valente + * * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. * * BFQ is a proportional share disk scheduling algorithm based on the @@ -142,6 +144,8 @@ static DEFINE_IDA(cic_index_ida); ((struct cfq_io_context *) (rq)->elevator_private[0]) #define RQ_BFQQ(rq) ((rq)->elevator_private[1]) +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); + #include "bfq-ioc.c" #include "bfq-sched.c" #include "bfq-cgroup.c" @@ -853,7 +857,7 @@ static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) { if (bfqd->budgets_assigned < 194) - return bfq_default_max_budget; + return bfq_default_max_budget / 32; else return bfqd->bfq_max_budget / 32; } @@ -2763,6 +2767,8 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) struct bfq_data *bfqd = e->elevator_data; ssize_t num_char = 0; + spin_lock_irq(bfqd->queue->queue_lock); + num_char += sprintf(page + num_char, "Active:\n"); list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { num_char += sprintf(page + num_char, @@ -2783,6 +2789,9 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) bfqq->last_rais_start_finish), jiffies_to_msecs(bfqq->raising_cur_max_time)); } + + spin_unlock_irq(bfqd->queue->queue_lock); + return num_char; } diff --git a/block/bfq-sched.c b/block/bfq-sched.c index fd50b7fd130..a0051489bef 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c @@ -6,6 +6,8 @@ * * Copyright (C) 2008 Fabio Checconi * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente */ #ifdef CONFIG_CGROUP_BFQIO diff --git a/block/bfq.h b/block/bfq.h index be2c572978c..4f67daae407 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -1,11 +1,13 @@ /* - * BFQ-v5 for 3.1.0: data structures and common functions prototypes. + * BFQ-v5r1 for 3.1.0: data structures and common functions prototypes. * * Based on ideas and code from CFQ: * Copyright (C) 2003 Jens Axboe * * Copyright (C) 2008 Fabio Checconi * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente */ #ifndef _BFQ_H From 475b036388af8206551a3ed6cae155d160f82d59 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 17:35:33 -0500 Subject: [PATCH 272/678] cpufreq: ondemand: restrict min to lp max after touch for X ms also more conservative parameters to go with --- drivers/cpufreq/cpufreq_ondemand.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 64975af087d..2e2b8311a31 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -38,15 +38,15 @@ */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define DEF_FREQUENCY_UP_THRESHOLD (88) -#define DEF_SAMPLING_DOWN_FACTOR (2) +#define DEF_FREQUENCY_UP_THRESHOLD (92) +#define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define MICRO_FREQUENCY_UP_THRESHOLD (88) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -#define DEF_SAMPLING_RATE (50000) +#define DEF_SAMPLING_RATE (60000) #define DEF_IO_IS_BUSY (1) #define DEF_UI_DYNAMIC_SAMPLING_RATE (30000) #define DEF_UI_COUNTER (5) @@ -324,7 +324,7 @@ static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, #endif static unsigned int Touch_poke_attr[4] = {1500000, 0, 0, 0}; -static unsigned int Touch_poke_boost_duration_ms = 0; +static unsigned int Touch_poke_boost_duration_ms = 2000; static unsigned long Touch_poke_boost_till_jiffies = 0; static ssize_t store_touch_poke(struct kobject *a, struct attribute *b, @@ -565,8 +565,8 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* * keep freq for touch boost */ - if (Touch_poke_boost_till_jiffies > jiffies) - return; +// if (Touch_poke_boost_till_jiffies > jiffies) +// return; /* * Every sampling_rate, we check, if current idle time is less @@ -705,6 +705,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) #endif /* Check for frequency decrease */ /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) return; @@ -717,6 +718,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * policy->cur) { unsigned int freq_next; + unsigned int freq_min; freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential); @@ -724,8 +726,13 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* No longer fully busy, reset rate_mult */ this_dbs_info->rate_mult = 1; - if (freq_next < policy->min) - freq_next = policy->min; + if (is_lp_cluster() && Touch_poke_boost_till_jiffies > jiffies) { + freq_min = idle_top_freq; + } else { + freq_min = policy->min; + } + if (freq_next < freq_min) + freq_next = freq_min; if (!dbs_tuners_ins.powersave_bias) { debug_freq = freq_next; From e6e006a678d7b31b779c34b91b00e1924b1c95e3 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 17:37:22 -0500 Subject: [PATCH 273/678] mach-tegra: cpu-tegra3.c: back to old delays --- arch/arm/mach-tegra/cpu-tegra3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 98ab950d70e..e8a4aa92a76 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -39,9 +39,9 @@ #include "clock.h" #define INITIAL_STATE TEGRA_HP_DISABLED -#define UP2G0_DELAY_MS 384 -#define UP2Gn_DELAY_MS 128 -#define DOWN_DELAY_MS 2112 +#define UP2G0_DELAY_MS 300 +#define UP2Gn_DELAY_MS 150 +#define DOWN_DELAY_MS 1000 static struct mutex *tegra3_cpu_lock; From f8963ea9d5d04c96f3cad9cb4b6882028972a882 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 17:38:01 -0500 Subject: [PATCH 274/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 7e3b9e1ebce..01ca40768ab 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -176,14 +176,18 @@ CONFIG_LBDAF=y # CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_ROW=y CONFIG_IOSCHED_CFQ=y CONFIG_IOSCHED_SIO=y -CONFIG_IOSCHED_VR=y +# CONFIG_IOSCHED_VR is not set +CONFIG_IOSCHED_BFQ=y +CONFIG_CGROUP_BFQIO=y CONFIG_DEFAULT_DEADLINE=y +# CONFIG_DEFAULT_ROW is not set # CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_BFQ is not set # CONFIG_DEFAULT_NOOP is not set # CONFIG_DEFAULT_SIO is not set -# CONFIG_DEFAULT_VR is not set CONFIG_DEFAULT_IOSCHED="deadline" # CONFIG_INLINE_SPIN_TRYLOCK is not set # CONFIG_INLINE_SPIN_TRYLOCK_BH is not set From 249e2361dbd0794b677443d300f970c435d6f6f8 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 3 Jan 2013 19:22:39 -0500 Subject: [PATCH 275/678] Revert "Revert " arm: tegra: usb: phy: fix hotplug function"" This reverts commit b912ac39fad08da1b89a91cb78384b173d982102. --- arch/arm/mach-tegra/usb_phy.c | 6 +++--- drivers/usb/host/ehci-tegra.c | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index e40e2415c5a..6e84e3d8279 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -865,7 +865,7 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy) val |= HOSTPC1_DEVLC_PHCD; writel(val, base + HOSTPC1_DEVLC); #endif - if (phy->instance == 2) { + if (phy->hotplug) { val = readl(base + USB_SUSP_CTRL); val |= USB_PHY_CLK_VALID_INT_ENB; writel(val, base + USB_SUSP_CTRL); @@ -1482,7 +1482,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) writel(val, base + UTMIP_BAT_CHRG_CFG0); } - if (phy->instance != 2) { + if (!phy->hotplug) { val = readl(base + UTMIP_XCVR_CFG0); val |= (UTMIP_FORCE_PD_POWERDOWN | UTMIP_FORCE_PD2_POWERDOWN | UTMIP_FORCE_PDZI_POWERDOWN); @@ -1512,7 +1512,7 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy, bool is_dpd) utmi_phy_clk_disable(phy); - utmip_pad_power_off(phy, true); + utmip_pad_power_off(phy, is_dpd); return 0; } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index cad3bbb2942..76f40688f82 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -208,7 +208,6 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB | TEGRA_USB_PHY_CLK_VALID_INT_STS; writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); - val = readl(&hw->status); if (!(val & STS_PCD)) { spin_unlock(&ehci->lock); @@ -218,6 +217,12 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) val &= ~(TEGRA_USB_PORTSC1_WKCN | PORT_RWC_BITS); writel(val , (hcd->regs + TEGRA_USB_PORTSC1_OFFSET)); } + else if (tegra->bus_suspended && + tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { + printk("%s: no device connected before suspend\n", __func__); + spin_unlock(&ehci->lock); + return 0; + } spin_unlock(&ehci->lock); } @@ -1366,6 +1371,7 @@ static int tegra_ehci_remove(struct platform_device *pdev) usb_remove_hcd(hcd); usb_put_hcd(hcd); tegra_usb_phy_power_off(tegra->phy, true); + tegra_ehci_disable_phy_interrupt(hcd); tegra_usb_phy_close(tegra->phy); iounmap(hcd->regs); From d92870a41048d4345e66c0172f80ae5a97da8f61 Mon Sep 17 00:00:00 2001 From: "Raymond Golo (intersectRaven)" Date: Mon, 31 Dec 2012 13:00:26 +0800 Subject: [PATCH 276/678] Workaround for crash when unplugging OTG devices. Just always disable the IRQs instead of checking with dev which causes a crash upon device retrieval. --- drivers/usb/host/ehci-tegra.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 76f40688f82..d5212ebc968 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -761,16 +761,13 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) * Must not be called with a lock on ehci->lock */ static void tegra_ehci_disable_phy_interrupt(struct usb_hcd *hcd) { - struct tegra_ehci_hcd *tegra; u32 val; if (hcd->irq >= 0) { - tegra = dev_get_drvdata(hcd->self.controller); - if (tegra->phy->hotplug) { - /* Disable PHY clock valid interrupts */ - val = readl(hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET); - val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB; - writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); - } + /* Disable PHY clock valid interrupts */ + val = readl(hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET); + val &= ~TEGRA_USB_PHY_CLK_VALID_INT_ENB; + writel(val , (hcd->regs + TEGRA_USB_SUSP_CTRL_OFFSET)); + /* Wait for the interrupt handler to finish */ synchronize_irq(hcd->irq); } From 157ef8d604707462525a9575b9dc3edc0c83debb Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 4 Jan 2013 23:48:38 -0500 Subject: [PATCH 277/678] mach-tegra: tegra3_clocks.c: use max emc rate at LP max for smoothness --- arch/arm/mach-tegra/tegra3_clocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index ae18e7f5849..99b64e4f250 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4809,7 +4809,7 @@ unsigned long tegra_emc_to_cpu_ratio(unsigned long cpu_rate) /* Vote on memory bus frequency based on cpu frequency; cpu rate is in kHz, emc rate is in Hz */ - if (cpu_rate >= 925000) + if (cpu_rate >= 650000) return emc_max_rate; /* cpu >= 925 MHz, emc max */ else if (cpu_rate >= 450000) return emc_max_rate/2; /* cpu >= 450 MHz, emc max/2 */ From d8c98f71c3f0548b1201d01be142d9d8111b9114 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 8 Jan 2013 15:40:56 -0500 Subject: [PATCH 278/678] Revert "vfp: compile with neon" This reverts commit 1eaa67f41e8c8220a63da7375169c4413fc97163. --- arch/arm/vfp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index ec624f0150c..6de73aab019 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -7,7 +7,7 @@ # ccflags-y := -DDEBUG # asflags-y := -DDEBUG -KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=neon) +KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp) LDFLAGS +=--no-warn-mismatch obj-y += vfp.o From 589b590a52421847f766dcf4fb2554eca7bad85a Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 14:38:00 -0500 Subject: [PATCH 279/678] cpufreq: interactive: update and reset interactive governor thanks to franco for the kang --- drivers/cpufreq/cpufreq_interactive.c | 968 +++++++++++---------- include/trace/events/cpufreq_interactive.h | 58 +- 2 files changed, 542 insertions(+), 484 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 19daca8c5e4..93cc5fa10ff 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -19,48 +19,39 @@ #include #include #include -#include +#include +#include +#include #include #include #include #include #include #include -#include #include #include #include -#include -#include - -#include "../../arch/arm/mach-tegra/clock.h" -#include "../../arch/arm/mach-tegra/pm.h" #define CREATE_TRACE_POINTS #include -/* lpcpu variables */ -static struct clk *cpu_lp_clk; -static unsigned int idle_top_freq; - static atomic_t active_count = ATOMIC_INIT(0); struct cpufreq_interactive_cpuinfo { struct timer_list cpu_timer; - int timer_idlecancel; + struct timer_list cpu_slack_timer; + spinlock_t load_lock; /* protects the next 4 fields */ u64 time_in_idle; - u64 time_in_iowait; - u64 idle_exit_time; - u64 timer_run_time; - int idling; - u64 freq_change_time; - u64 freq_change_time_in_idle; - u64 freq_change_time_in_iowait; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; struct cpufreq_policy *policy; struct cpufreq_frequency_table *freq_table; unsigned int target_freq; unsigned int floor_freq; u64 floor_validate_time; + u64 hispeed_validate_time; + struct rw_semaphore enable_sem; int governor_enabled; }; @@ -71,50 +62,38 @@ static struct task_struct *speedchange_task; static cpumask_t speedchange_cpumask; static spinlock_t speedchange_cpumask_lock; -struct cpufreq_interactive_core_lock { - struct pm_qos_request_list qos_min_req; - struct pm_qos_request_list qos_max_req; - struct task_struct *lock_task; - struct work_struct unlock_work; - struct timer_list unlock_timer; - int request_active; - unsigned long lock_period; - struct mutex mutex; -}; - -/* default timeout for core lock down */ -#define DEFAULT_CORE_LOCK_PERIOD 200000 /* 200 ms */ - -static struct cpufreq_interactive_core_lock core_lock; - /* Hi speed to bump to from lo speed when load burst (default max) */ -static unsigned int hispeed_freq = 1300000; +static unsigned int hispeed_freq; -/* Go to hispeed_freq when CPU load at or above this value. */ -#define DEFAULT_GO_HISPEED_LOAD 85 -static unsigned long go_hispeed_load; +/* Go to hi speed when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 99 +static unsigned long go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; -/* Consider IO as busy */ -static unsigned long io_is_busy; +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 90 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; +static spinlock_t target_loads_lock; +static unsigned int *target_loads = default_target_loads; +static int ntarget_loads = ARRAY_SIZE(default_target_loads); /* * The minimum amount of time to spend at a frequency before we can ramp down. */ -#define DEFAULT_MIN_SAMPLE_TIME 60000; -static unsigned long min_sample_time; +#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) +static unsigned long min_sample_time = DEFAULT_MIN_SAMPLE_TIME; /* * The sample rate of the timer used to increase frequency */ -#define DEFAULT_TIMER_RATE 40000; -static unsigned long timer_rate; +#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC) +static unsigned long timer_rate = DEFAULT_TIMER_RATE; /* * Wait this long before raising speed above hispeed, by default a single * timer interval. */ #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE -static unsigned long above_hispeed_delay_val; +static unsigned long above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; /* * Boost pulse to hispeed on touchscreen input. @@ -129,10 +108,19 @@ struct cpufreq_interactive_inputopen { static struct cpufreq_interactive_inputopen inputopen; static struct workqueue_struct *inputopen_wq; +/* Non-zero means indefinite speed boost active */ +static int boost_val; +/* Duration of a boot pulse in usecs */ +static int boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME; +/* End time of boost pulse in ktime converted to usecs */ +static u64 boostpulse_endtime; + /* - * Non-zero means longer-term speed boost active. + * Max additional time to wait in idle, beyond timer_rate, at speeds above + * minimum before wakeup to reduce speed, or -1 if unnecessary. */ -static int boost_val; +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE) +static int timer_slack_val = DEFAULT_TIMER_SLACK; static int cpufreq_governor_interactive(struct cpufreq_policy *policy, unsigned int event); @@ -147,156 +135,210 @@ struct cpufreq_governor cpufreq_gov_interactive = { .owner = THIS_MODULE, }; -static unsigned int cpufreq_interactive_get_target( - int cpu_load, int load_since_change, +static void cpufreq_interactive_timer_resched( struct cpufreq_interactive_cpuinfo *pcpu) { - unsigned int target_freq; + unsigned long expires = jiffies + usecs_to_jiffies(timer_rate); + unsigned long flags; - /* - * Choose greater of short-term load (since last idle timer - * started or timer function re-armed itself) or long-term load - * (since last frequency change). - */ - if (load_since_change > cpu_load) - cpu_load = load_since_change; - - /* Jump boost policy */ - if (cpu_load >= go_hispeed_load || boost_val) { - if (pcpu->target_freq < hispeed_freq && - hispeed_freq < pcpu->policy->max) { - target_freq = hispeed_freq; - } else { - target_freq = pcpu->policy->max * cpu_load / 100; - - if (target_freq < hispeed_freq) - target_freq = hispeed_freq; - - if (pcpu->target_freq == hispeed_freq && - target_freq > hispeed_freq && - cputime64_sub(pcpu->timer_run_time, - pcpu->freq_change_time) - < above_hispeed_delay_val) { - - target_freq = pcpu->target_freq; - trace_cpufreq_interactive_notyet( - smp_processor_id(), - cpu_load, - pcpu->target_freq, - target_freq); + mod_timer_pinned(&pcpu->cpu_timer, expires); + if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) { + expires += usecs_to_jiffies(timer_slack_val); + mod_timer_pinned(&pcpu->cpu_slack_timer, expires); + } + + spin_lock_irqsave(&pcpu->load_lock, flags); + pcpu->time_in_idle = + get_cpu_idle_time_us(smp_processor_id(), + &pcpu->time_in_idle_timestamp); + pcpu->cputime_speedadj = 0; + pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp; + spin_unlock_irqrestore(&pcpu->load_lock, flags); +} + +static unsigned int freq_to_targetload(unsigned int freq) +{ + int i; + unsigned int ret; + unsigned long flags; + + spin_lock_irqsave(&target_loads_lock, flags); + + for (i = 0; i < ntarget_loads - 1 && freq >= target_loads[i+1]; i += 2) + ; + + ret = target_loads[i]; + spin_unlock_irqrestore(&target_loads_lock, flags); + return ret; +} + +/* + * If increasing frequencies never map to a lower target load then + * choose_freq() will find the minimum frequency that does not exceed its + * target load given the current load. + */ + +static unsigned int choose_freq( + struct cpufreq_interactive_cpuinfo *pcpu, unsigned int loadadjfreq) +{ + unsigned int freq = pcpu->policy->cur; + unsigned int prevfreq, freqmin, freqmax; + unsigned int tl; + int index; + + freqmin = 0; + freqmax = UINT_MAX; + + do { + prevfreq = freq; + tl = freq_to_targetload(freq); + + /* + * Find the lowest frequency where the computed load is less + * than or equal to the target load. + */ + + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, loadadjfreq / tl, + CPUFREQ_RELATION_L, &index); + freq = pcpu->freq_table[index].frequency; + + if (freq > prevfreq) { + /* The previous frequency is too low. */ + freqmin = prevfreq; + + if (freq >= freqmax) { + /* + * Find the highest frequency that is less + * than freqmax. + */ + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmax - 1, CPUFREQ_RELATION_H, + &index); + freq = pcpu->freq_table[index].frequency; + + if (freq == freqmin) { + /* + * The first frequency below freqmax + * has already been found to be too + * low. freqmax is the lowest speed + * we found that is fast enough. + */ + freq = freqmax; + break; + } + } + } else if (freq < prevfreq) { + /* The previous frequency is high enough. */ + freqmax = prevfreq; + + if (freq <= freqmin) { + /* + * Find the lowest frequency that is higher + * than freqmin. + */ + cpufreq_frequency_table_target( + pcpu->policy, pcpu->freq_table, + freqmin + 1, CPUFREQ_RELATION_L, + &index); + freq = pcpu->freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; } } - } else { - target_freq = idle_top_freq * cpu_load / 100; - } - target_freq = min(target_freq, pcpu->policy->max); - return target_freq; + /* If same frequency chosen as previous then done. */ + } while (freq != prevfreq); + + return freq; } -static inline cputime64_t get_cpu_iowait_time( - unsigned int cpu, cputime64_t *wall) +static u64 update_load(int cpu) { - u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu); + u64 now; + u64 now_idle; + unsigned int delta_idle; + unsigned int delta_time; + u64 active_time; - if (iowait_time == -1ULL) - return 0; + now_idle = get_cpu_idle_time_us(cpu, &now); + delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); + delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + active_time = delta_time - delta_idle; + pcpu->cputime_speedadj += active_time * pcpu->policy->cur; - return iowait_time; + pcpu->time_in_idle = now_idle; + pcpu->time_in_idle_timestamp = now; + return now; } static void cpufreq_interactive_timer(unsigned long data) { - unsigned int delta_idle; - unsigned int delta_iowait; + u64 now; unsigned int delta_time; + u64 cputime_speedadj; int cpu_load; - int load_since_change; - u64 time_in_idle; - u64 time_in_iowait; - u64 idle_exit_time; struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, data); - u64 now_idle; - u64 now_iowait; unsigned int new_freq; + unsigned int loadadjfreq; unsigned int index; unsigned long flags; + bool boosted; - smp_rmb(); - + if (!down_read_trylock(&pcpu->enable_sem)) + return; if (!pcpu->governor_enabled) goto exit; - /* - * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, - * this lets idle exit know the current idle time sample has - * been processed, and idle exit can generate a new sample and - * re-arm the timer. This prevents a concurrent idle - * exit on that CPU from writing a new set of info at the same time - * the timer function runs (the timer function can't use that info - * until more time passes). - */ - time_in_idle = pcpu->time_in_idle; - time_in_iowait = pcpu->time_in_iowait; - idle_exit_time = pcpu->idle_exit_time; - now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); - now_iowait = get_cpu_iowait_time(data, NULL); - smp_wmb(); - - /* If we raced with cancelling a timer, skip. */ - if (!idle_exit_time) - goto exit; - - delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle); - delta_iowait = (unsigned int) cputime64_sub(now_iowait, time_in_iowait); - delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, - idle_exit_time); + spin_lock_irqsave(&pcpu->load_lock, flags); + now = update_load(data); + delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp); + cputime_speedadj = pcpu->cputime_speedadj; + spin_unlock_irqrestore(&pcpu->load_lock, flags); - /* - * If timer ran less than 1ms after short-term sample started, retry. - */ - if (delta_time < 1000) + if (WARN_ON_ONCE(!delta_time)) goto rearm; - if (delta_idle > delta_time) - cpu_load = 0; - else { - if (io_is_busy && delta_idle >= delta_iowait) - delta_idle -= delta_iowait; + do_div(cputime_speedadj, delta_time); + loadadjfreq = (unsigned int)cputime_speedadj * 100; + cpu_load = loadadjfreq / pcpu->target_freq; + boosted = boost_val || now < boostpulse_endtime; - cpu_load = 100 * (delta_time - delta_idle) / delta_time; + if (cpu_load >= go_hispeed_load || boosted) { + if (pcpu->target_freq < hispeed_freq) { + new_freq = hispeed_freq; + } else { + new_freq = choose_freq(pcpu, loadadjfreq); + + if (new_freq < hispeed_freq) + new_freq = hispeed_freq; + } + } else { + new_freq = choose_freq(pcpu, loadadjfreq); } - delta_idle = (unsigned int) cputime64_sub(now_idle, - pcpu->freq_change_time_in_idle); - delta_iowait = (unsigned int) cputime64_sub(now_iowait, - pcpu->freq_change_time_in_iowait); - delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time, - pcpu->freq_change_time); - - if ((delta_time == 0) || (delta_idle > delta_time)) - load_since_change = 0; - else { - if (io_is_busy && delta_idle >= delta_iowait) - delta_idle -= delta_iowait; - - load_since_change = - 100 * (delta_time - delta_idle) / delta_time; + if (pcpu->target_freq >= hispeed_freq && + new_freq > pcpu->target_freq && + now - pcpu->hispeed_validate_time < above_hispeed_delay_val) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); + goto rearm; } - - /* - * Combine short-term load (since last idle timer started or timer - * function re-armed itself) and long-term load (since last frequency - * change) to determine new target frequency. - * - * This function implements the cpufreq scaling policy - */ - new_freq = cpufreq_interactive_get_target(cpu_load, load_since_change, - pcpu); + + pcpu->hispeed_validate_time = now; if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table, - new_freq, CPUFREQ_RELATION_H, + new_freq, CPUFREQ_RELATION_L, &index)) { pr_warn_once("timer %d: cpufreq_frequency_table_target error\n", (int) data); @@ -310,28 +352,37 @@ static void cpufreq_interactive_timer(unsigned long data) * floor frequency for the minimum sample time since last validated. */ if (new_freq < pcpu->floor_freq) { - if (cputime64_sub(pcpu->timer_run_time, - pcpu->floor_validate_time) - < min_sample_time) { - - trace_cpufreq_interactive_notyet(data, cpu_load, - pcpu->target_freq, new_freq); + if (now - pcpu->floor_validate_time < min_sample_time) { + trace_cpufreq_interactive_notyet( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); goto rearm; } } - pcpu->floor_freq = new_freq; - pcpu->floor_validate_time = pcpu->timer_run_time; + /* + * Update the timestamp for checking whether speed has been held at + * or above the selected frequency for a minimum of min_sample_time, + * if not boosted to hispeed_freq. If boosted to hispeed_freq then we + * allow the speed to drop as soon as the boostpulse duration expires + * (or the indefinite boost is turned off). + */ + + if (!boosted || new_freq > hispeed_freq) { + pcpu->floor_freq = new_freq; + pcpu->floor_validate_time = now; + } if (pcpu->target_freq == new_freq) { - trace_cpufreq_interactive_already(data, cpu_load, - pcpu->target_freq, new_freq); + trace_cpufreq_interactive_already( + data, cpu_load, pcpu->target_freq, + pcpu->policy->cur, new_freq); goto rearm_if_notmax; } trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq, - new_freq); - + pcpu->policy->cur, new_freq); + pcpu->target_freq = new_freq; spin_lock_irqsave(&speedchange_cpumask_lock, flags); cpumask_set_cpu(data, &speedchange_cpumask); @@ -347,31 +398,11 @@ static void cpufreq_interactive_timer(unsigned long data) goto exit; rearm: - if (!timer_pending(&pcpu->cpu_timer)) { - /* - * If already at min: if that CPU is idle, don't set timer. - * Else cancel the timer if that CPU goes idle. We don't - * need to re-evaluate speed until the next idle exit. - */ - if (pcpu->target_freq == pcpu->policy->min) { - smp_rmb(); - - if (pcpu->idling) - goto exit; - - pcpu->timer_idlecancel = 1; - } - - pcpu->time_in_idle = get_cpu_idle_time_us( - data, &pcpu->idle_exit_time); - pcpu->time_in_iowait = get_cpu_iowait_time( - data, NULL); - - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } + if (!timer_pending(&pcpu->cpu_timer)) + cpufreq_interactive_timer_resched(pcpu); exit: + up_read(&pcpu->enable_sem); return; } @@ -381,15 +412,16 @@ static void cpufreq_interactive_idle_start(void) &per_cpu(cpuinfo, smp_processor_id()); int pending; - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return; + } - pcpu->idling = 1; - smp_wmb(); pending = timer_pending(&pcpu->cpu_timer); if (pcpu->target_freq != pcpu->policy->min) { -#ifdef CONFIG_SMP /* * Entering idle while not at lowest speed. On some * platforms this can hold the other CPU(s) at that speed @@ -398,35 +430,11 @@ static void cpufreq_interactive_idle_start(void) * min indefinitely. This should probably be a quirk of * the CPUFreq driver. */ - if (!pending) { - pcpu->time_in_idle = get_cpu_idle_time_us( - smp_processor_id(), &pcpu->idle_exit_time); - pcpu->time_in_iowait = get_cpu_iowait_time( - smp_processor_id(), NULL); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); - } -#endif - } else { - /* - * If at min speed and entering idle after load has - * already been evaluated, and a timer has been set just in - * case the CPU suddenly goes busy, cancel that timer. The - * CPU didn't go busy; we'll recheck things upon idle exit. - */ - if (pending && pcpu->timer_idlecancel) { - del_timer(&pcpu->cpu_timer); - /* - * Ensure last timer run time is after current idle - * sample start time, so next idle exit will always - * start a new idle sampling period. - */ - pcpu->idle_exit_time = 0; - pcpu->timer_idlecancel = 0; - } + if (!pending) + cpufreq_interactive_timer_resched(pcpu); } + up_read(&pcpu->enable_sem); } static void cpufreq_interactive_idle_end(void) @@ -434,34 +442,23 @@ static void cpufreq_interactive_idle_end(void) struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, smp_processor_id()); - pcpu->idling = 0; - smp_wmb(); + if (!down_read_trylock(&pcpu->enable_sem)) + return; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return; + } - /* - * Arm the timer for 1-2 ticks later if not already, and if the timer - * function has already processed the previous load sampling - * interval. (If the timer is not pending but has not processed - * the previous interval, it is probably racing with us on another - * CPU. Let it compute load based on the previous sample and then - * re-arm the timer for another interval when it's done, rather - * than updating the interval start time to be "now", which doesn't - * give the timer function enough time to make a decision on this - * run.) - */ - if (timer_pending(&pcpu->cpu_timer) == 0 && - pcpu->timer_run_time >= pcpu->idle_exit_time && - pcpu->governor_enabled) { - pcpu->time_in_idle = - get_cpu_idle_time_us(smp_processor_id(), - &pcpu->idle_exit_time); - pcpu->time_in_iowait = - get_cpu_iowait_time(smp_processor_id(), - NULL); - pcpu->timer_idlecancel = 0; - mod_timer(&pcpu->cpu_timer, - jiffies + usecs_to_jiffies(timer_rate)); + /* Arm the timer for 1-2 ticks later if not already. */ + if (!timer_pending(&pcpu->cpu_timer)) { + cpufreq_interactive_timer_resched(pcpu); + } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) { + del_timer(&pcpu->cpu_timer); + del_timer(&pcpu->cpu_slack_timer); + cpufreq_interactive_timer(smp_processor_id()); } + up_read(&pcpu->enable_sem); } static int cpufreq_interactive_speedchange_task(void *data) @@ -476,7 +473,8 @@ static int cpufreq_interactive_speedchange_task(void *data) spin_lock_irqsave(&speedchange_cpumask_lock, flags); if (cpumask_empty(&speedchange_cpumask)) { - spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + spin_unlock_irqrestore(&speedchange_cpumask_lock, + flags); schedule(); if (kthread_should_stop()) @@ -495,10 +493,12 @@ static int cpufreq_interactive_speedchange_task(void *data) unsigned int max_freq = 0; pcpu = &per_cpu(cpuinfo, cpu); - smp_rmb(); - - if (!pcpu->governor_enabled) + if (!down_read_trylock(&pcpu->enable_sem)) + continue; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); continue; + } for_each_cpu(j, pcpu->policy->cpus) { struct cpufreq_interactive_cpuinfo *pjcpu = @@ -508,32 +508,25 @@ static int cpufreq_interactive_speedchange_task(void *data) max_freq = pjcpu->target_freq; } - __cpufreq_driver_target(pcpu->policy, - max_freq, - CPUFREQ_RELATION_H); - - trace_cpufreq_interactive_setspeed(cpu, pcpu->target_freq, - pcpu->policy->cur); + if (max_freq != pcpu->policy->cur) + __cpufreq_driver_target(pcpu->policy, + max_freq, + CPUFREQ_RELATION_H); + trace_cpufreq_interactive_setspeed(cpu, + pcpu->target_freq, + pcpu->policy->cur); - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(cpu, - &pcpu->freq_change_time); - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(cpu, NULL); + up_read(&pcpu->enable_sem); } } return 0; } -static unsigned int Touch_poke_attr[4] = {1100000, 860000, 0, 0}; - static void cpufreq_interactive_boost(void) { int i; int anyboost = 0; - unsigned int nr_cpus; - unsigned int input_boost_freq; unsigned long flags; struct cpufreq_interactive_cpuinfo *pcpu; @@ -542,24 +535,20 @@ static void cpufreq_interactive_boost(void) for_each_online_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - nr_cpus = num_online_cpus(); - - if (!is_lp_cluster()) { - input_boost_freq = Touch_poke_attr[nr_cpus-1]; - } else { - input_boost_freq = idle_top_freq; - } - if (pcpu->target_freq < input_boost_freq) { - pcpu->target_freq = input_boost_freq; + if (pcpu->target_freq < hispeed_freq) { + pcpu->target_freq = hispeed_freq; cpumask_set_cpu(i, &speedchange_cpumask); + pcpu->hispeed_validate_time = + ktime_to_us(ktime_get()); anyboost = 1; } - /* Set floor freq and (re)start timer for when last + /* + * Set floor freq and (re)start timer for when last * validated. */ - pcpu->floor_freq = input_boost_freq; + pcpu->floor_freq = hispeed_freq; pcpu->floor_validate_time = ktime_to_us(ktime_get()); } @@ -569,71 +558,116 @@ static void cpufreq_interactive_boost(void) wake_up_process(speedchange_task); } -static void cpufreq_interactive_core_lock_timer(unsigned long data) +static int cpufreq_interactive_notifier( + struct notifier_block *nb, unsigned long val, void *data) { - queue_work(inputopen_wq, &core_lock.unlock_work); -} + struct cpufreq_freqs *freq = data; + struct cpufreq_interactive_cpuinfo *pcpu; + int cpu; + unsigned long flags; -static void cpufreq_interactive_unlock_cores(struct work_struct *wq) -{ - struct cpufreq_interactive_core_lock *cl = - container_of(wq, struct cpufreq_interactive_core_lock, - unlock_work); + if (val == CPUFREQ_POSTCHANGE) { + pcpu = &per_cpu(cpuinfo, freq->cpu); + if (!down_read_trylock(&pcpu->enable_sem)) + return 0; + if (!pcpu->governor_enabled) { + up_read(&pcpu->enable_sem); + return 0; + } - mutex_lock(&cl->mutex); + for_each_cpu(cpu, pcpu->policy->cpus) { + struct cpufreq_interactive_cpuinfo *pjcpu = + &per_cpu(cpuinfo, cpu); + spin_lock_irqsave(&pjcpu->load_lock, flags); + update_load(cpu); + spin_unlock_irqrestore(&pjcpu->load_lock, flags); + } - if (--cl->request_active) { - goto done; + up_read(&pcpu->enable_sem); } + return 0; +} + +static struct notifier_block cpufreq_notifier_block = { + .notifier_call = cpufreq_interactive_notifier, +}; + +static ssize_t show_target_loads( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + int i; + ssize_t ret = 0; + unsigned long flags; - pm_qos_update_request(&cl->qos_min_req, - PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + spin_lock_irqsave(&target_loads_lock, flags); - pm_qos_update_request(&cl->qos_max_req, - PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); + for (i = 0; i < ntarget_loads; i++) + ret += sprintf(buf + ret, "%u%s", target_loads[i], + i & 0x1 ? ":" : " "); -done: - mutex_unlock(&cl->mutex); + ret += sprintf(buf + ret, "\n"); + spin_unlock_irqrestore(&target_loads_lock, flags); + return ret; } -/* Lock down to whatever # of cores online - * right now. - * - * A pm_qos request for 1 online CPU results in - * an instant cluster switch. - */ -static void cpufreq_interactive_lock_cores(void) +static ssize_t store_target_loads( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) { - unsigned int ncpus; + int ret; + const char *cp; + unsigned int *new_target_loads = NULL; + int ntokens = 1; + int i; + unsigned long flags; - mutex_lock(&core_lock.mutex); + cp = buf; + while ((cp = strpbrk(cp + 1, " :"))) + ntokens++; - if (core_lock.request_active) { - goto arm_timer; + if (!(ntokens & 0x1)) + goto err_inval; + + new_target_loads = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL); + if (!new_target_loads) { + ret = -ENOMEM; + goto err; } - ncpus = num_online_cpus(); - pm_qos_update_request(&core_lock.qos_min_req, ncpus); - pm_qos_update_request(&core_lock.qos_max_req, ncpus); - core_lock.request_active++; + cp = buf; + i = 0; + while (i < ntokens) { + if (sscanf(cp, "%u", &new_target_loads[i++]) != 1) + goto err_inval; -arm_timer: - mod_timer(&core_lock.unlock_timer, - jiffies + usecs_to_jiffies(core_lock.lock_period)); + cp = strpbrk(cp, " :"); + if (!cp) + break; + cp++; + } - mutex_unlock(&core_lock.mutex); -} + if (i != ntokens) + goto err_inval; -static int cpufreq_interactive_lock_cores_task(void *data) -{ - while(1) { - cpufreq_interactive_lock_cores(); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } - return 0; + spin_lock_irqsave(&target_loads_lock, flags); + if (target_loads != default_target_loads) + kfree(target_loads); + target_loads = new_target_loads; + ntarget_loads = ntokens; + spin_unlock_irqrestore(&target_loads_lock, flags); + return count; + +err_inval: + ret = -EINVAL; +err: + kfree(new_target_loads); + return ret; } +static struct global_attr target_loads_attr = + __ATTR(target_loads, S_IRUGO | S_IWUSR, + show_target_loads, store_target_loads); + /* * Pulsed boost on input event raises CPUs to hispeed_freq and lets * usual algorithm of min_sample_time decide when to allow speed @@ -645,7 +679,7 @@ static void cpufreq_interactive_input_event(struct input_handle *handle, unsigned int code, int value) { if (input_boost_val && type == EV_SYN && code == SYN_REPORT) { - wake_up_process(core_lock.lock_task); + trace_cpufreq_interactive_boost("input"); cpufreq_interactive_boost(); } } @@ -724,44 +758,6 @@ static struct input_handler cpufreq_interactive_input_handler = { .id_table = cpufreq_interactive_ids, }; -static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%u\n", input_boost_val); -} - -static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - int ret; - unsigned long val; - - ret = strict_strtoul(buf, 0, &val); - if (ret < 0) - return ret; - input_boost_val = val; - return count; -} - -define_one_global_rw(input_boost); - -static ssize_t show_io_is_busy(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", io_is_busy); -} - -static ssize_t store_io_is_busy(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t count) -{ - if (!strict_strtoul(buf, 0, &io_is_busy)) - return count; - return -EINVAL; -} - -static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644, - show_io_is_busy, store_io_is_busy); - static ssize_t show_hispeed_freq(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -874,6 +870,50 @@ static ssize_t store_timer_rate(struct kobject *kobj, static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644, show_timer_rate, store_timer_rate); +static ssize_t show_timer_slack( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", timer_slack_val); +} + +static ssize_t store_timer_slack( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtol(buf, 10, &val); + if (ret < 0) + return ret; + + timer_slack_val = val; + return count; +} + +define_one_global_rw(timer_slack); + +static ssize_t show_input_boost(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", input_boost_val); +} + +static ssize_t store_input_boost(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = strict_strtoul(buf, 0, &val); + if (ret < 0) + return ret; + input_boost_val = val; + return count; +} + +define_one_global_rw(input_boost); + static ssize_t show_boost(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -892,26 +932,72 @@ static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, boost_val = val; - if (boost_val) + if (boost_val) { + trace_cpufreq_interactive_boost("on"); cpufreq_interactive_boost(); - - if (!boost_val) - trace_cpufreq_interactive_unboost(hispeed_freq); + } else { + trace_cpufreq_interactive_unboost("off"); + } return count; } define_one_global_rw(boost); +static ssize_t store_boostpulse(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + boostpulse_endtime = ktime_to_us(ktime_get()) + boostpulse_duration_val; + trace_cpufreq_interactive_boost("pulse"); + cpufreq_interactive_boost(); + return count; +} + +static struct global_attr boostpulse = + __ATTR(boostpulse, 0200, NULL, store_boostpulse); + +static ssize_t show_boostpulse_duration( + struct kobject *kobj, struct attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", boostpulse_duration_val); +} + +static ssize_t store_boostpulse_duration( + struct kobject *kobj, struct attribute *attr, const char *buf, + size_t count) +{ + int ret; + unsigned long val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + boostpulse_duration_val = val; + return count; +} + +define_one_global_rw(boostpulse_duration); + static struct attribute *interactive_attributes[] = { - &io_is_busy_attr.attr, + &target_loads_attr.attr, &hispeed_freq_attr.attr, &go_hispeed_load_attr.attr, &above_hispeed_delay.attr, &min_sample_time_attr.attr, &timer_rate_attr.attr, + &timer_slack.attr, &input_boost.attr, &boost.attr, + &boostpulse.attr, + &boostpulse_duration.attr, NULL, }; @@ -921,18 +1007,18 @@ static struct attribute_group interactive_attr_group = { }; static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, - unsigned long val, - void *data) + unsigned long val, + void *data) { switch (val) { - case IDLE_START: - cpufreq_interactive_idle_start(); - break; - case IDLE_END: - cpufreq_interactive_idle_end(); - break; + case IDLE_START: + cpufreq_interactive_idle_start(); + break; + case IDLE_END: + cpufreq_interactive_idle_end(); + break; } - + return 0; } @@ -955,32 +1041,34 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, freq_table = cpufreq_frequency_get_table(policy->cpu); + if (!hispeed_freq) + hispeed_freq = policy->max; for_each_cpu(j, policy->cpus) { + unsigned long expires; + pcpu = &per_cpu(cpuinfo, j); pcpu->policy = policy; pcpu->target_freq = policy->cur; pcpu->freq_table = freq_table; - pcpu->freq_change_time_in_idle = - get_cpu_idle_time_us(j, - &pcpu->freq_change_time); - pcpu->time_in_idle = pcpu->freq_change_time_in_idle; - pcpu->idle_exit_time = pcpu->freq_change_time; - pcpu->freq_change_time_in_iowait = - get_cpu_iowait_time(j, NULL); - pcpu->time_in_iowait = pcpu->freq_change_time_in_iowait; - - pcpu->timer_idlecancel = 1; pcpu->floor_freq = pcpu->target_freq; pcpu->floor_validate_time = - pcpu->freq_change_time; + ktime_to_us(ktime_get()); + pcpu->hispeed_validate_time = + pcpu->floor_validate_time; + down_write(&pcpu->enable_sem); + expires = jiffies + usecs_to_jiffies(timer_rate); + pcpu->cpu_timer.expires = expires; + add_timer_on(&pcpu->cpu_timer, j); + if (timer_slack_val >= 0) { + expires += usecs_to_jiffies(timer_slack_val); + pcpu->cpu_slack_timer.expires = expires; + add_timer_on(&pcpu->cpu_slack_timer, j); + } pcpu->governor_enabled = 1; - smp_wmb(); + up_write(&pcpu->enable_sem); } - if (!hispeed_freq) - hispeed_freq = policy->max; - /* * Do not register the idle hook and create sysfs * entries if we have already done so. @@ -996,31 +1084,29 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, rc = input_register_handler(&cpufreq_interactive_input_handler); if (rc) pr_warn("%s: failed to register input handler\n", - __func__); - + __func__); + idle_notifier_register(&cpufreq_interactive_idle_nb); + cpufreq_register_notifier( + &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); break; case CPUFREQ_GOV_STOP: for_each_cpu(j, policy->cpus) { pcpu = &per_cpu(cpuinfo, j); + down_write(&pcpu->enable_sem); pcpu->governor_enabled = 0; - smp_wmb(); del_timer_sync(&pcpu->cpu_timer); - - /* - * Reset idle exit time since we may cancel the timer - * before it can run after the last idle exit time, - * to avoid tripping the check in idle exit for a timer - * that is trying to run. - */ - pcpu->idle_exit_time = 0; + del_timer_sync(&pcpu->cpu_slack_timer); + up_write(&pcpu->enable_sem); } flush_work(&inputopen.inputopen_work); if (atomic_dec_return(&active_count) > 0) return 0; - + + cpufreq_unregister_notifier( + &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); idle_notifier_unregister(&cpufreq_interactive_idle_nb); input_unregister_handler(&cpufreq_interactive_input_handler); sysfs_remove_group(cpufreq_global_kobject, @@ -1040,78 +1126,49 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, return 0; } +static void cpufreq_interactive_nop_timer(unsigned long data) +{ +} + static int __init cpufreq_interactive_init(void) { unsigned int i; struct cpufreq_interactive_cpuinfo *pcpu; - /* - * If MAX_USER_RT_PRIO < MAX_RT_PRIO the kernel thread has higher priority than any user thread - * In this case MAX_USER_RT_PRIO = 99 and MAX_RT_PRIO = 100, therefore boosting the priority of this - * kernel thread above user threads which will, by my reason, increase interactvitiy. - */ - struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO-1 }; - - cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); - idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; - - go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; - min_sample_time = DEFAULT_MIN_SAMPLE_TIME; - above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; - timer_rate = DEFAULT_TIMER_RATE; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; /* Initalize per-cpu timers */ for_each_possible_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - init_timer(&pcpu->cpu_timer); + init_timer_deferrable(&pcpu->cpu_timer); pcpu->cpu_timer.function = cpufreq_interactive_timer; pcpu->cpu_timer.data = i; + init_timer(&pcpu->cpu_slack_timer); + pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer; + spin_lock_init(&pcpu->load_lock); + init_rwsem(&pcpu->enable_sem); } + spin_lock_init(&target_loads_lock); spin_lock_init(&speedchange_cpumask_lock); - speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, NULL, - "cfinteractive"); + "cfinteractive"); if (IS_ERR(speedchange_task)) - return PTR_ERR(speedchange_task); + return PTR_ERR(speedchange_task); sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); get_task_struct(speedchange_task); inputopen_wq = create_workqueue("cfinteractive"); - + if (!inputopen_wq) goto err_freetask; - + INIT_WORK(&inputopen.inputopen_work, cpufreq_interactive_input_open); /* NB: wake up so the thread does not look hung to the freezer */ wake_up_process(speedchange_task); - pm_qos_add_request(&core_lock.qos_min_req, PM_QOS_MIN_ONLINE_CPUS, - PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); - - pm_qos_add_request(&core_lock.qos_max_req, PM_QOS_MAX_ONLINE_CPUS, - PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); - - init_timer(&core_lock.unlock_timer); - core_lock.unlock_timer.function = cpufreq_interactive_core_lock_timer; - core_lock.unlock_timer.data = 0; - - core_lock.request_active = 0; - core_lock.lock_period = DEFAULT_CORE_LOCK_PERIOD; - mutex_init(&core_lock.mutex); - - core_lock.lock_task = kthread_create(cpufreq_interactive_lock_cores_task, NULL, - "kinteractive_lockcores"); - - if (IS_ERR(core_lock.lock_task)) - return PTR_ERR(core_lock.lock_task); - - sched_setscheduler_nocheck(core_lock.lock_task, SCHED_FIFO, ¶m); - get_task_struct(core_lock.lock_task); - - INIT_WORK(&core_lock.unlock_work, cpufreq_interactive_unlock_cores); return cpufreq_register_governor(&cpufreq_gov_interactive); err_freetask: @@ -1131,11 +1188,6 @@ static void __exit cpufreq_interactive_exit(void) kthread_stop(speedchange_task); put_task_struct(speedchange_task); destroy_workqueue(inputopen_wq); - - pm_qos_remove_request(&core_lock.qos_min_req); - pm_qos_remove_request(&core_lock.qos_max_req); - kthread_stop(core_lock.lock_task); - put_task_struct(core_lock.lock_task); } module_exit(cpufreq_interactive_exit); diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h index 64c9a825346..951e6ca12da 100644 --- a/include/trace/events/cpufreq_interactive.h +++ b/include/trace/events/cpufreq_interactive.h @@ -8,7 +8,7 @@ DECLARE_EVENT_CLASS(set, TP_PROTO(u32 cpu_id, unsigned long targfreq, - unsigned long actualfreq), + unsigned long actualfreq), TP_ARGS(cpu_id, targfreq, actualfreq), TP_STRUCT__entry( @@ -36,68 +36,74 @@ DEFINE_EVENT(set, cpufreq_interactive_setspeed, DECLARE_EVENT_CLASS(loadeval, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq), + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), TP_STRUCT__entry( __field(unsigned long, cpu_id ) __field(unsigned long, load ) - __field(unsigned long, curfreq ) - __field(unsigned long, targfreq ) + __field(unsigned long, curtarg ) + __field(unsigned long, curactual ) + __field(unsigned long, newtarg ) ), TP_fast_assign( __entry->cpu_id = cpu_id; __entry->load = load; - __entry->curfreq = curfreq; - __entry->targfreq = targfreq; + __entry->curtarg = curtarg; + __entry->curactual = curactual; + __entry->newtarg = newtarg; ), - TP_printk("cpu=%lu load=%lu cur=%lu targ=%lu", - __entry->cpu_id, __entry->load, __entry->curfreq, - __entry->targfreq) + TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", + __entry->cpu_id, __entry->load, __entry->curtarg, + __entry->curactual, __entry->newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_target, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_already, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); DEFINE_EVENT(loadeval, cpufreq_interactive_notyet, TP_PROTO(unsigned long cpu_id, unsigned long load, - unsigned long curfreq, unsigned long targfreq), - TP_ARGS(cpu_id, load, curfreq, targfreq) + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) ); TRACE_EVENT(cpufreq_interactive_boost, - TP_PROTO(unsigned long freq), - TP_ARGS(freq), + TP_PROTO(const char *s), + TP_ARGS(s), TP_STRUCT__entry( - __field(unsigned long, freq) + __string(s, s) ), TP_fast_assign( - __entry->freq = freq; + __assign_str(s, s); ), - TP_printk("freq=%lu", __entry->freq) + TP_printk("%s", __get_str(s)) ); TRACE_EVENT(cpufreq_interactive_unboost, - TP_PROTO(unsigned long freq), - TP_ARGS(freq), + TP_PROTO(const char *s), + TP_ARGS(s), TP_STRUCT__entry( - __field(unsigned long, freq) + __string(s, s) ), TP_fast_assign( - __entry->freq = freq; + __assign_str(s, s); ), - TP_printk("freq=%lu", __entry->freq) + TP_printk("%s", __get_str(s)) ); #endif /* _TRACE_CPUFREQ_INTERACTIVE_H */ From de9978c56896bf6f09629c4e1452e78dedd6dbed Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 14:40:57 -0500 Subject: [PATCH 280/678] mach-tegra: cpu-tegra3.c: revert to stock hotplug down delay --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index e8a4aa92a76..016674b4ad9 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -41,7 +41,7 @@ #define INITIAL_STATE TEGRA_HP_DISABLED #define UP2G0_DELAY_MS 300 #define UP2Gn_DELAY_MS 150 -#define DOWN_DELAY_MS 1000 +#define DOWN_DELAY_MS 2000 static struct mutex *tegra3_cpu_lock; From 87378dfecef379750cea431a8a251a74974987dc Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 15:01:27 -0500 Subject: [PATCH 281/678] makefile tweaks --- Makefile | 6 +++--- arch/arm/Makefile | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index eafc8ed4e13..01121443454 100644 --- a/Makefile +++ b/Makefile @@ -347,11 +347,11 @@ CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) -MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds -CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage @@ -370,7 +370,7 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -Werror-implicit-function-declaration \ -Wno-format-security \ -fno-delete-null-pointer-checks -mno-unaligned-access \ - -mtune=cortex-a8 -march=armv7-a -mfpu=neon \ + -mtune=cortex-a9 -march=armv7-a -mfpu=neon \ -fpredictive-commoning -fgcse-after-reload -ftree-vectorize -mvectorize-with-neon-quad \ -fipa-cp-clone -fsingle-precision-constant \ -funswitch-loops -floop-interchange \ diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 9a208d1a217..5507cdc2c17 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -57,8 +57,7 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -#arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv7-a -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 From e6a448857dae7b543d6b18d12640a99cf2732d93 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 15:02:32 -0500 Subject: [PATCH 282/678] mach-tegra: tegra3_clocks.c: add higher g clocks --- arch/arm/mach-tegra/tegra3_clocks.c | 98 ++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 24 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 99b64e4f250..6b6493b2a86 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3552,6 +3552,18 @@ static struct clk tegra_pll_u = { }; static struct clk_pll_freq_table tegra_pll_x_freq_table[] = { + /* 1.9 GHz */ + { 12000000, 1900000000, 850, 6, 1, 8}, + { 13000000, 1900000000, 915, 7, 1, 8}, + { 16800000, 1900000000, 708, 7, 1, 8}, + { 19200000, 1900000000, 885, 10, 1, 8}, + { 26000000, 1900000000, 950, 13, 1, 8}, + /* 1.8 GHz */ + { 12000000, 1800000000, 850, 6, 1, 8}, + { 13000000, 1800000000, 915, 7, 1, 8}, + { 16800000, 1800000000, 708, 7, 1, 8}, + { 19200000, 1800000000, 885, 10, 1, 8}, + { 26000000, 1800000000, 900, 13, 1, 8}, /* 1.7 GHz */ { 12000000, 1700000000, 850, 6, 1, 8}, { 13000000, 1700000000, 915, 7, 1, 8}, /* actual: 1699.2 MHz */ @@ -3559,13 +3571,6 @@ static struct clk_pll_freq_table tegra_pll_x_freq_table[] = { { 19200000, 1700000000, 885, 10, 1, 8}, /* actual: 1699.2 MHz */ { 26000000, 1700000000, 850, 13, 1, 8}, - /* 1.624 GHz */ - { 12000000, 1624000000, 812, 6, 1, 8}, - { 13000000, 1624000000, 999, 8, 1, 8}, /* actual 1623.4 MHz */ - { 16800000, 1624000000, 870, 9, 1, 8}, - { 19200000, 1624000000, 930, 11, 1, 8}, /* actual 1623.2 MHz */ - { 26000000, 1624000000, 812, 13, 1, 8}, - /* 1.6 GHz */ { 12000000, 1600000000, 800, 6, 1, 8}, { 13000000, 1600000000, 738, 6, 1, 8}, /* actual: 1599.0 MHz */ @@ -3624,14 +3629,14 @@ static struct clk tegra_pll_x = { .ops = &tegra_pll_ops, .reg = 0xe0, .parent = &tegra_pll_ref, - .max_rate = 1700000000, + .max_rate = 1900000000, .u.pll = { .input_min = 2000000, .input_max = 31000000, .cf_min = 1000000, .cf_max = 6000000, .vco_min = 20000000, - .vco_max = 1700000000, + .vco_max = 1900000000, .freq_table = tegra_pll_x_freq_table, .lock_delay = 300, }, @@ -3909,7 +3914,7 @@ static struct clk tegra_clk_cclk_g = { .inputs = mux_cclk_g, .reg = 0x368, .ops = &tegra_super_ops, - .max_rate = 1700000000, + .max_rate = 1900000000, }; static struct clk tegra_clk_cclk_lp = { @@ -3918,7 +3923,7 @@ static struct clk tegra_clk_cclk_lp = { .inputs = mux_cclk_lp, .reg = 0x370, .ops = &tegra_super_ops, - .max_rate = 740000000, + .max_rate = 720000000, }; static struct clk tegra_clk_sclk = { @@ -3934,7 +3939,7 @@ static struct clk tegra_clk_virtual_cpu_g = { .name = "cpu_g", .parent = &tegra_clk_cclk_g, .ops = &tegra_cpu_ops, - .max_rate = 1700000000, + .max_rate = 1900000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -3946,7 +3951,7 @@ static struct clk tegra_clk_virtual_cpu_lp = { .name = "cpu_lp", .parent = &tegra_clk_cclk_lp, .ops = &tegra_cpu_ops, - .max_rate = 740000000, + .max_rate = 720000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -4693,16 +4698,58 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, - { 5, 620000 }, + { 5, 666000 }, { 6, 860000 }, { 7, 1000000 }, - { 8, 1200000 }, - { 9, 1300000 }, - {10, 1400000 }, - {11, 1500000 }, - {12, 1600000 }, - {13, 1700000 }, - {14, CPUFREQ_TABLE_END }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, 1700000 }, + {15, CPUFREQ_TABLE_END }, +}; + +static struct cpufreq_frequency_table freq_table_1p8GHz[] = { + { 0, 51000 }, + { 1, 102000 }, + { 2, 204000 }, + { 3, 340000 }, + { 4, 475000 }, + { 5, 666000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, 1700000 }, + {15, 1800000 }, + {16, CPUFREQ_TABLE_END }, +}; + +static struct cpufreq_frequency_table freq_table_1p9GHz[] = { + { 0, 51000 }, + { 1, 102000 }, + { 2, 204000 }, + { 3, 340000 }, + { 4, 475000 }, + { 5, 666000 }, + { 6, 860000 }, + { 7, 1000000 }, + { 8, 1100000 }, + { 9, 1200000 }, + {10, 1300000 }, + {11, 1400000 }, + {12, 1500000 }, + {13, 1600000 }, + {14, 1700000 }, + {15, 1800000 }, + {16, 1900000 }, + {17, CPUFREQ_TABLE_END }, }; static struct tegra_cpufreq_table_data cpufreq_tables[] = { @@ -4712,7 +4759,10 @@ static struct tegra_cpufreq_table_data cpufreq_tables[] = { { freq_table_1p4GHz, 2, 11 }, { freq_table_1p5GHz, 2, 12 }, { freq_table_1p6GHz, 2, 13 }, - { freq_table_1p7GHz, 2, 12 }, + { freq_table_1p7GHz, 2, 13 }, + { freq_table_1p7GHz, 2, 13 }, + { freq_table_1p8GHz, 2, 13 }, + { freq_table_1p9GHz, 2, 13 }, }; static int clip_cpu_rate_limits( @@ -4809,8 +4859,8 @@ unsigned long tegra_emc_to_cpu_ratio(unsigned long cpu_rate) /* Vote on memory bus frequency based on cpu frequency; cpu rate is in kHz, emc rate is in Hz */ - if (cpu_rate >= 650000) - return emc_max_rate; /* cpu >= 925 MHz, emc max */ + if (cpu_rate >= 850000) + return emc_max_rate; /* cpu >= 850 MHz, emc max */ else if (cpu_rate >= 450000) return emc_max_rate/2; /* cpu >= 450 MHz, emc max/2 */ else if (cpu_rate >= 250000) From b4c603a1000efc8c0f60dc86f90e8247244f1e79 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 18:01:54 -0500 Subject: [PATCH 283/678] Revert "makefile tweaks" This reverts commit 87378dfecef379750cea431a8a251a74974987dc. --- Makefile | 6 +++--- arch/arm/Makefile | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 01121443454..eafc8ed4e13 100644 --- a/Makefile +++ b/Makefile @@ -347,11 +347,11 @@ CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) -MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds -CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage @@ -370,7 +370,7 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -Werror-implicit-function-declaration \ -Wno-format-security \ -fno-delete-null-pointer-checks -mno-unaligned-access \ - -mtune=cortex-a9 -march=armv7-a -mfpu=neon \ + -mtune=cortex-a8 -march=armv7-a -mfpu=neon \ -fpredictive-commoning -fgcse-after-reload -ftree-vectorize -mvectorize-with-neon-quad \ -fipa-cp-clone -fsingle-precision-constant \ -funswitch-loops -floop-interchange \ diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 5507cdc2c17..9a208d1a217 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -57,7 +57,8 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) +#arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv7-a -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 From 4d9333c5951873d4144aff2f5597162b56216eb6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 20:32:32 -0500 Subject: [PATCH 284/678] Revert "Revert "makefile tweaks"" This reverts commit b4c603a1000efc8c0f60dc86f90e8247244f1e79. --- Makefile | 6 +++--- arch/arm/Makefile | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index eafc8ed4e13..01121443454 100644 --- a/Makefile +++ b/Makefile @@ -347,11 +347,11 @@ CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) -MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +MODFLAGS = -DMODULE -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -marm -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds -CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops +CFLAGS_KERNEL = -O2 -fgcse-lm -fgcse-sm -fsched-spec-load -fforce-addr -ffast-math -fsingle-precision-constant -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad -funswitch-loops AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage @@ -370,7 +370,7 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -Werror-implicit-function-declaration \ -Wno-format-security \ -fno-delete-null-pointer-checks -mno-unaligned-access \ - -mtune=cortex-a8 -march=armv7-a -mfpu=neon \ + -mtune=cortex-a9 -march=armv7-a -mfpu=neon \ -fpredictive-commoning -fgcse-after-reload -ftree-vectorize -mvectorize-with-neon-quad \ -fipa-cp-clone -fsingle-precision-constant \ -funswitch-loops -floop-interchange \ diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 9a208d1a217..5507cdc2c17 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -57,8 +57,7 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -#arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a8 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv7-a -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize,-march=armv5te -Wa$(comma)-march=armv7-a) arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 From b014ec22fd7cbd134e5de09f8f4fdb070b3e1880 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 21:19:47 -0500 Subject: [PATCH 285/678] cpufreq: ondemand: add g core min lock for 2s after touch hard coded at 860000 for now also adjusted ondemand params as a result --- drivers/cpufreq/cpufreq_ondemand.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 2e2b8311a31..16832c48963 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -37,12 +37,12 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define DEF_FREQUENCY_UP_THRESHOLD (92) -#define DEF_SAMPLING_DOWN_FACTOR (1) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (2) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define MICRO_FREQUENCY_UP_THRESHOLD (80) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -323,8 +323,8 @@ static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, } #endif -static unsigned int Touch_poke_attr[4] = {1500000, 0, 0, 0}; -static unsigned int Touch_poke_boost_duration_ms = 2000; +static unsigned int Touch_poke_attr[4] = {1500000, 1100000, 0, 0}; +static unsigned int Touch_poke_boost_duration_ms = 4000; static unsigned long Touch_poke_boost_till_jiffies = 0; static ssize_t store_touch_poke(struct kobject *a, struct attribute *b, @@ -726,11 +726,15 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* No longer fully busy, reset rate_mult */ this_dbs_info->rate_mult = 1; - if (is_lp_cluster() && Touch_poke_boost_till_jiffies > jiffies) { - freq_min = idle_top_freq; + if (Touch_poke_boost_till_jiffies > jiffies) { + if (is_lp_cluster()) { + freq_min = idle_top_freq; + } else { + freq_min = 860000; } } else { freq_min = policy->min; } + if (freq_next < freq_min) freq_next = freq_min; From b67f84229f8dfed653cd58ff17dd341c991488d9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 10 Jan 2013 22:26:02 -0500 Subject: [PATCH 286/678] mach-tegra: overclock g cores up to 1.9GHz --- arch/arm/mach-tegra/tegra3_clocks.c | 14 ++++++++------ arch/arm/mach-tegra/tegra3_dvfs.c | 2 +- drivers/cpufreq/cpufreq.c | 4 ++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 6b6493b2a86..a26097b2999 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3556,14 +3556,16 @@ static struct clk_pll_freq_table tegra_pll_x_freq_table[] = { { 12000000, 1900000000, 850, 6, 1, 8}, { 13000000, 1900000000, 915, 7, 1, 8}, { 16800000, 1900000000, 708, 7, 1, 8}, - { 19200000, 1900000000, 885, 10, 1, 8}, + { 19200000, 1900000000, 989, 10, 1, 8}, /* actual: 1898.8 MHz */ { 26000000, 1900000000, 950, 13, 1, 8}, + /* 1.8 GHz */ - { 12000000, 1800000000, 850, 6, 1, 8}, - { 13000000, 1800000000, 915, 7, 1, 8}, - { 16800000, 1800000000, 708, 7, 1, 8}, - { 19200000, 1800000000, 885, 10, 1, 8}, + { 12000000, 1800000000, 900, 6, 1, 8}, + { 13000000, 1800000000, 969, 7, 1, 8}, /* actual: 1799.6 MHz */ + { 16800000, 1800000000, 750, 7, 1, 8}, + { 19200000, 1800000000, 750, 8, 1, 8}, { 26000000, 1800000000, 900, 13, 1, 8}, + /* 1.7 GHz */ { 12000000, 1700000000, 850, 6, 1, 8}, { 13000000, 1700000000, 915, 7, 1, 8}, /* actual: 1699.2 MHz */ @@ -3969,7 +3971,7 @@ static struct clk tegra_clk_cpu_cmplx = { .name = "cpu", .inputs = mux_cpu_cmplx, .ops = &tegra_cpu_cmplx_ops, - .max_rate = 1700000000, + .max_rate = 1900000000, }; static struct clk tegra_clk_cop = { diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 01cb3583121..017755dea73 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -176,7 +176,7 @@ static struct dvfs cpu_dvfs_table[] = { /* Nexus 7 - faking speedo id = 4, process id =2*/ // CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), - CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600), + CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700, 1700, 1800, 1900), /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ // CPU_DVFS("cpu_g", 4, 2, MHZ, 475, 475, 620, 620, 860, 860, 1000, 1000, 1100, 1100, 1200, 1300, 1400, 1500, 1600), diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1a8b31853ec..50273130257 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -622,7 +622,7 @@ static ssize_t show_UV_mV_table(struct cpufreq_policy *policy, char *buf) struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - i = cpu_clk_g->dvfs->num_freqs - 3; + i = cpu_clk_g->dvfs->num_freqs; for(i--; i >=0; i--) { out += sprintf(out, "%lumhz: %i mV\n", @@ -643,7 +643,7 @@ static ssize_t store_UV_mV_table(struct cpufreq_policy *policy, char *buf, size_ struct clk *cpu_clk_g = tegra_get_clock_by_name("cpu_g"); /* find how many actual entries there are */ - i = cpu_clk_g->dvfs->num_freqs - 3; + i = cpu_clk_g->dvfs->num_freqs; for(i--; i >= 0; i--) { From e5a003d1c25dd351ae271acdeced800c69c632cc Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 00:16:15 -0500 Subject: [PATCH 287/678] tweaks for overclocking --- arch/arm/mach-tegra/tegra3_clocks.c | 5 ++--- arch/arm/mach-tegra/tegra3_dvfs.c | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index a26097b2999..29cbde74795 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4749,9 +4749,8 @@ static struct cpufreq_frequency_table freq_table_1p9GHz[] = { {12, 1500000 }, {13, 1600000 }, {14, 1700000 }, - {15, 1800000 }, - {16, 1900000 }, - {17, CPUFREQ_TABLE_END }, + {15, 1900000 }, + {16, CPUFREQ_TABLE_END }, }; static struct tegra_cpufreq_table_data cpufreq_tables[] = { diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 017755dea73..9b9408fb92c 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; #endif static bool tegra_dvfs_cpu_disabled; @@ -38,7 +38,7 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; @@ -67,7 +67,7 @@ static int cpu_below_core = VDD_CPU_BELOW_VDD_CORE; static struct dvfs_rail tegra3_dvfs_rail_vdd_cpu = { .reg_id = "vdd_cpu", - .max_millivolts = 1250, + .max_millivolts = 1500, .min_millivolts = 800, .step = VDD_SAFE_STEP, .jmp_to_zero = true, @@ -175,7 +175,7 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), /* Nexus 7 - faking speedo id = 4, process id =2*/ -// CPU_DVFS("cpu_g", 4, 2, MHZ, 520, 520, 700, 700, 860, 860, 1050, 1150, 1200, 1280, 1300, 1340, 1380, 1500, 1600), + /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700, 1700, 1800, 1900), /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ From 5ac3cf1edf1e4c1c12c04ea02a9982efe2abded8 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 18:05:04 -0500 Subject: [PATCH 288/678] cpufreq: add touchdemand governor and make ondemand two phased --- drivers/cpufreq/Kconfig | 30 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/cpufreq_ondemand.c | 199 +---- drivers/cpufreq/cpufreq_touchdemand.c | 1076 +++++++++++++++++++++++++ include/linux/cpufreq.h | 3 + 5 files changed, 1122 insertions(+), 187 deletions(-) create mode 100644 drivers/cpufreq/cpufreq_touchdemand.c diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 9859af5b158..f73594fe8b6 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -88,6 +88,18 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. +config CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND + bool "touchdemand" + select CPU_FREQ_GOV_TOUCHDEMAND + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'touchdemand' as default. This allows + you to get a full dynamic frequency capable system by simply + loading your cpufreq low-level hardware driver. + Be aware that not all cpufreq drivers support the touchdemand + governor. If unsure have a look at the help section of the + driver. Fallback governor will be the performance governor. + config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE bool "conservative" select CPU_FREQ_GOV_CONSERVATIVE @@ -166,6 +178,24 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. +config CPU_FREQ_GOV_TOUCHDEMAND + tristate "'touchdemand' cpufreq policy governor" + select CPU_FREQ_TABLE + help + 'touchdemand' - This driver adds a dynamic cpufreq policy governor. + The governor does a periodic polling and + changes frequency based on the CPU utilization. + The support for this governor depends on CPU capability to + do fast frequency switching (i.e, very low latency frequency + transitions). + + To compile this driver as a module, choose M here: the + module will be called cpufreq_touchdemand. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + config CPU_FREQ_GOV_INTERACTIVE tristate "'interactive' cpufreq policy governor" help diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index adc4e22d0ae..d20361e0116 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o +obj-$(CONFIG_CPU_FREQ_GOV_TOUCHDEMAND) += cpufreq_touchdemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o obj-$(CONFIG_CPU_FREQ_GOV_PEGASUSQ) += cpufreq_pegasusq.o diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 16832c48963..7e0cc1c88bf 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -38,15 +38,15 @@ */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) -#define DEF_SAMPLING_DOWN_FACTOR (2) +#define DEF_FREQUENCY_UP_THRESHOLD (90) +#define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define MICRO_FREQUENCY_UP_THRESHOLD (80) +#define MICRO_FREQUENCY_UP_THRESHOLD (90) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -#define DEF_SAMPLING_RATE (60000) +#define DEF_SAMPLING_RATE (50000) #define DEF_IO_IS_BUSY (1) #define DEF_UI_DYNAMIC_SAMPLING_RATE (30000) #define DEF_UI_COUNTER (5) @@ -131,10 +131,7 @@ static struct dbs_tuners { unsigned int sampling_down_factor; unsigned int powersave_bias; unsigned int io_is_busy; -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE unsigned int two_phase_freq; -#endif - unsigned int touch_poke; unsigned int origin_sampling_rate; unsigned int ui_sampling_rate; unsigned int ui_counter; @@ -144,10 +141,7 @@ static struct dbs_tuners { .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE - .two_phase_freq = 1100000, -#endif - .touch_poke = 1, + .two_phase_freq = 1200000, .ui_sampling_rate = DEF_UI_DYNAMIC_SAMPLING_RATE, .ui_counter = DEF_UI_COUNTER, }; @@ -289,10 +283,7 @@ show_one(sampling_down_factor, sampling_down_factor); show_one(down_differential, down_differential); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE show_one(two_phase_freq, two_phase_freq); -#endif -show_one(touch_poke, touch_poke); show_one(ui_sampling_rate, ui_sampling_rate); show_one(ui_counter, ui_counter); static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, @@ -307,7 +298,7 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; return count; } -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -321,31 +312,7 @@ static ssize_t store_two_phase_freq(struct kobject *a, struct attribute *b, return count; } -#endif -static unsigned int Touch_poke_attr[4] = {1500000, 1100000, 0, 0}; -static unsigned int Touch_poke_boost_duration_ms = 4000; -static unsigned long Touch_poke_boost_till_jiffies = 0; - -static ssize_t store_touch_poke(struct kobject *a, struct attribute *b, - const char *buf, size_t count) -{ - int ret; - ret = sscanf(buf, "%u,%u,%u,%u,%u", &Touch_poke_attr[0], &Touch_poke_attr[1], - &Touch_poke_attr[2], &Touch_poke_attr[3], &Touch_poke_boost_duration_ms); - if (ret < 4) - return -EINVAL; - - if (ret != 5) - Touch_poke_boost_duration_ms = 0; - - if(Touch_poke_attr[0] == 0) - dbs_tuners_ins.touch_poke = 0; - else - dbs_tuners_ins.touch_poke = 1; - - return count; -} static ssize_t store_ui_sampling_rate(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -395,8 +362,7 @@ static ssize_t store_down_differential(struct kobject *a, struct attribute *b, int ret; ret = sscanf(buf, "%u", &input); - if(ret != 1 || input > DEF_FREQUENCY_DOWN_DIFFERENTIAL || - input < MICRO_FREQUENCY_DOWN_DIFFERENTIAL) { + if(ret != 1) { return -EINVAL; } dbs_tuners_ins.down_differential = input; @@ -495,10 +461,7 @@ define_one_global_rw(down_differential); define_one_global_rw(sampling_down_factor); define_one_global_rw(ignore_nice_load); define_one_global_rw(powersave_bias); -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE define_one_global_rw(two_phase_freq); -#endif -define_one_global_rw(touch_poke); define_one_global_rw(ui_sampling_rate); define_one_global_rw(ui_counter); @@ -511,10 +474,7 @@ static struct attribute *dbs_attributes[] = { &ignore_nice_load.attr, &powersave_bias.attr, &io_is_busy.attr, -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE &two_phase_freq.attr, -#endif - &touch_poke.attr, &ui_sampling_rate.attr, &ui_counter.attr, NULL @@ -537,13 +497,12 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); } -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + int set_two_phase_freq(int cpufreq) { dbs_tuners_ins.two_phase_freq = cpufreq; return 0; } -#endif static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { @@ -554,20 +513,12 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) struct cpufreq_policy *policy; unsigned int j; -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE static unsigned int phase = 0; static unsigned int counter = 0; -#endif this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; - /* - * keep freq for touch boost - */ -// if (Touch_poke_boost_till_jiffies > jiffies) -// return; - /* * Every sampling_rate, we check, if current idle time is less * than 20% (default), then we try to increase frequency @@ -660,13 +611,6 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* Check for frequency increase */ if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { /* If switching to max speed, apply sampling_down_factor */ -#ifndef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE - if (policy->cur < policy->max) - this_dbs_info->rate_mult = - dbs_tuners_ins.sampling_down_factor; - debug_freq = policy->max; - dbs_freq_increase(policy, policy->max); -#else if (counter < 5) { counter++; if (counter > 2) { @@ -691,10 +635,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) debug_freq = policy->max; dbs_freq_increase(policy, policy->max); } -#endif return; } -#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_2_PHASE + if (counter > 0) { counter--; if (counter == 0) { @@ -702,10 +645,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) phase = 0; } } -#endif + /* Check for frequency decrease */ /* if we cannot reduce the frequency anymore, break out early */ - if (policy->cur == policy->min) return; @@ -718,7 +660,6 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * policy->cur) { unsigned int freq_next; - unsigned int freq_min; freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential); @@ -726,17 +667,8 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* No longer fully busy, reset rate_mult */ this_dbs_info->rate_mult = 1; - if (Touch_poke_boost_till_jiffies > jiffies) { - if (is_lp_cluster()) { - freq_min = idle_top_freq; - } else { - freq_min = 860000; } - } else { - freq_min = policy->min; - } - - if (freq_next < freq_min) - freq_next = freq_min; + if (freq_next < policy->min) + freq_next = policy->min; if (!dbs_tuners_ins.powersave_bias) { debug_freq = freq_next; @@ -869,10 +801,6 @@ static void dbs_chown(void) if (ret) pr_err("sys_chown down_differential error: %d", ret); - ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/touch_poke", low2highuid(AID_SYSTEM), low2highgid(0)); - if (ret) - pr_err("sys_chown touch_poke error: %d", ret); - ret = sys_chown("/sys/devices/system/cpu/cpufreq/ondemand/ui_sampling_rate", low2highuid(AID_SYSTEM), low2highgid(0)); if (ret) pr_err("sys_chown ui_sampling_rate error: %d", ret); @@ -886,8 +814,6 @@ static void dbs_refresh_callback(struct work_struct *unused) { struct cpufreq_policy *policy; struct cpu_dbs_info_s *this_dbs_info; - unsigned int nr_cpus; - unsigned int touch_poke_freq; unsigned int cpu = smp_processor_id(); if (lock_policy_rwsem_write(cpu) < 0) @@ -899,108 +825,12 @@ static void dbs_refresh_callback(struct work_struct *unused) g_ui_counter = dbs_tuners_ins.ui_counter; if(dbs_tuners_ins.ui_counter > 0) dbs_tuners_ins.sampling_rate = dbs_tuners_ins.ui_sampling_rate; - if (Touch_poke_boost_duration_ms) - Touch_poke_boost_till_jiffies = - jiffies + msecs_to_jiffies(Touch_poke_boost_duration_ms); - - /* We poke the frequency base on the online cpu number */ - nr_cpus = num_online_cpus(); - - if (!is_lp_cluster()) - touch_poke_freq = Touch_poke_attr[nr_cpus-1]; - else - touch_poke_freq = idle_top_freq; - - if(touch_poke_freq == 0 || policy->cur >= touch_poke_freq){ - unlock_policy_rwsem_write(cpu); - return; - } - - __cpufreq_driver_target(policy, touch_poke_freq, - CPUFREQ_RELATION_L); - this_dbs_info->prev_cpu_idle = get_cpu_idle_time(cpu, - &this_dbs_info->prev_cpu_wall); unlock_policy_rwsem_write(cpu); } static DECLARE_WORK(dbs_refresh_work, dbs_refresh_callback); -static void dbs_input_event(struct input_handle *handle, unsigned int type, - unsigned int code, int value) -{ - if (dbs_tuners_ins.touch_poke) - schedule_work(&dbs_refresh_work); -} - -static int input_dev_filter(const char* input_dev_name) -{ - int ret = 0; - if (strstr(input_dev_name, "touchscreen") || - strstr(input_dev_name, "-keypad") || - strstr(input_dev_name, "-nav") || - strstr(input_dev_name, "-oj")) { - } - else { - ret = 1; - } - return ret; -} - - -static int dbs_input_connect(struct input_handler *handler, - struct input_dev *dev, const struct input_device_id *id) -{ - struct input_handle *handle; - int error; - - /* filter out those input_dev that we don't care */ - if (input_dev_filter(dev->name)) - return 0; - - handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->dev = dev; - handle->handler = handler; - handle->name = "cpufreq"; - - error = input_register_handle(handle); - if (error) - goto err2; - - error = input_open_device(handle); - if (error) - goto err1; - - return 0; -err1: - input_unregister_handle(handle); -err2: - kfree(handle); - return error; -} - -static void dbs_input_disconnect(struct input_handle *handle) -{ - input_close_device(handle); - input_unregister_handle(handle); - kfree(handle); -} - -static const struct input_device_id dbs_ids[] = { - { .driver_info = 1 }, - { }, -}; -static struct input_handler dbs_input_handler = { - .event = dbs_input_event, - .connect = dbs_input_connect, - .disconnect = dbs_input_disconnect, - .name = "cpufreq_ond", - .id_table = dbs_ids, -}; - static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { @@ -1065,8 +895,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; dbs_tuners_ins.io_is_busy = should_io_be_busy(); } - if (!cpu) - rc = input_register_handler(&dbs_input_handler); mutex_unlock(&dbs_mutex); @@ -1081,9 +909,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, mutex_destroy(&this_dbs_info->timer_mutex); dbs_enable--; - if (!cpu) - input_unregister_handler(&dbs_input_handler); - if (!dbs_enable) sysfs_remove_group(cpufreq_global_kobject, &dbs_attr_group); diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c new file mode 100644 index 00000000000..f8fb6d29536 --- /dev/null +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -0,0 +1,1076 @@ +/* + * drivers/cpufreq/cpufreq_touchdemand.c + * + * Touch-Demand + * - Cpu scaling governor based on ondemand + * - Modified for Tegra 3, the LP core, and touchscreen response + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../arch/arm/mach-tegra/clock.h" +#include "../../arch/arm/mach-tegra/pm.h" + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (2) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define MICRO_FREQUENCY_UP_THRESHOLD (80) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) +#define DEF_SAMPLING_RATE (50000) +#define DEF_IO_IS_BUSY (1) + +/* + * The polling frequency of this governor depends on the capability of + * the processor. Default polling frequency is 1000 times the transition + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling + * rate. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. + * All times here are in uS. + */ +#define MIN_SAMPLING_RATE_RATIO (2) + +static unsigned int min_sampling_rate; +static unsigned int def_sampling_rate; + +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +static void do_dbs_timer(struct work_struct *work); +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +/* lpcpu variables */ +static struct clk *cpu_lp_clk; +static unsigned int idle_top_freq; + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_touchdemand = { + .name = "touchdemand", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + unsigned int rate_mult; + int cpu; + unsigned int sample_type:1; + /* + * percpu mutex that serializes governor limit change with + * do_dbs_timer invocation. We do not want do_dbs_timer to run + * when user is changing the governor or limits. + */ + struct mutex timer_mutex; +}; + +static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * dbs_mutex protects dbs_enable in governor start/stop. + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int sampling_down_factor; + unsigned int powersave_bias; + unsigned int io_is_busy; + unsigned int touch_floor_freq; + unsigned int touch_floor_time; + unsigned int touch_poke; + unsigned int origin_sampling_rate; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, + .powersave_bias = 0, + .touch_floor_freq = 860000, + .touch_floor_time = 2000, + .touch_poke = 1, +}; + +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) +{ + cputime64_t idle_time; + cputime64_t cur_wall_time; + cputime64_t busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, + kstat_cpu(cpu).cpustat.system); + + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); + + idle_time = cputime64_sub(cur_wall_time, busy_time); + if (wall) + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); + + return (cputime64_t)jiffies_to_usecs(idle_time); +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + return idle_time; +} + +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +static unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, + unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + policy->cpu); + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void touchdemand_powersave_bias_init_cpu(int cpu) +{ + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} + +static void touchdemand_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + touchdemand_powersave_bias_init_cpu(i); + } +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", min_sampling_rate); +} + +define_one_global_ro(sampling_rate_min); + +/* cpufreq_touchdemand Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); +show_one(up_threshold, up_threshold); +show_one(sampling_down_factor, sampling_down_factor); +show_one(down_differential, down_differential); +show_one(ignore_nice_load, ignore_nice); +show_one(powersave_bias, powersave_bias); +show_one(touch_floor_freq, touch_floor_freq); +show_one(touch_floor_time, touch_floor_time); +show_one(touch_poke, touch_poke); + +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; + return count; +} + +static ssize_t store_touch_floor_freq(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.touch_floor_freq = input; + + return count; +} + +static ssize_t store_touch_floor_time(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.touch_floor_time = input; + + return count; +} + +static unsigned int Touch_poke_attr[4] = {1500000, 1100000, 0, 0}; +static unsigned int Touch_poke_boost = 1; +static unsigned long Touch_poke_boost_till_jiffies = 0; + +static ssize_t store_touch_poke(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + int ret; + ret = sscanf(buf, "%u,%u,%u,%u,%u", &Touch_poke_attr[0], &Touch_poke_attr[1], + &Touch_poke_attr[2], &Touch_poke_attr[3], &Touch_poke_boost); + if (ret < 4) + return -EINVAL; + + if (ret != 5) + Touch_poke_boost = 0; + + if(Touch_poke_attr[0] == 0) + dbs_tuners_ins.touch_poke = 0; + else + dbs_tuners_ins.touch_poke = 1; + + return count; +} + +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + dbs_tuners_ins.io_is_busy = !!input; + return count; +} + +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + dbs_tuners_ins.up_threshold = input; + return count; +} + +static ssize_t store_down_differential(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if(ret != 1) { + return -EINVAL; + } + dbs_tuners_ins.down_differential = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, const char *buf, size_t count) +{ + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + dbs_tuners_ins.sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + + } + return count; +} + +static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + dbs_tuners_ins.powersave_bias = input; + touchdemand_powersave_bias_init(); + return count; +} + +define_one_global_rw(sampling_rate); +define_one_global_rw(io_is_busy); +define_one_global_rw(up_threshold); +define_one_global_rw(down_differential); +define_one_global_rw(sampling_down_factor); +define_one_global_rw(ignore_nice_load); +define_one_global_rw(powersave_bias); +define_one_global_rw(touch_floor_freq); +define_one_global_rw(touch_floor_time); +define_one_global_rw(touch_poke); + +static struct attribute *dbs_attributes[] = { + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &down_differential.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + &io_is_busy.attr, + &touch_floor_freq.attr, + &touch_floor_time.attr, + &touch_poke.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "touchdemand", +}; + +/************************** sysfs end ************************/ + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ + if (dbs_tuners_ins.powersave_bias) + freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); + //else if (p->cur == p->max) + // return; + + __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? + CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); +} + +int set_touch_floor_freq(int cpufreq) +{ + dbs_tuners_ins.touch_floor_freq = cpufreq; + return 0; +} + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + unsigned int debug_freq; + unsigned int debug_load; + unsigned int debug_iowait; + + struct cpufreq_policy *policy; + unsigned int j; + + this_dbs_info->freq_lo = 0; + policy = this_dbs_info->cur_policy; + + /* + * keep freq for touch boost + */ +// if (Touch_poke_boost_till_jiffies > jiffies) +// return; + + /* + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate, we look for a the lowest + * frequency which can sustain the load while keeping idle time over + * 30%. If such a frequency exist, we try to decrease to this frequency. + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of current frequency + */ + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load, load_freq; + int freq_avg; + + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + j_dbs_info->prev_cpu_iowait); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, + j_dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + /* + * For the purpose of touchdemand, waiting for disk IO is an + * indication that you're performance critical, and not that + * the system is actually idle. So subtract the iowait time + * from the cpu idle time. + */ + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) { + max_load_freq = load_freq; + debug_load = load; + debug_iowait = 100 * iowait_time / wall_time; + } + } + + /* Check for frequency increase */ + if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; + debug_freq = policy->max; + dbs_freq_increase(policy, policy->max); + return; + } + + /* Check for frequency decrease */ + /* if we cannot reduce the frequency anymore, break out early */ + + if (policy->cur == policy->min) + return; + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { + unsigned int freq_next; + unsigned int freq_min; + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + /* No longer fully busy, reset rate_mult */ + this_dbs_info->rate_mult = 1; + + if (Touch_poke_boost_till_jiffies > jiffies) { + if (is_lp_cluster()) { + freq_min = idle_top_freq; + } else { + freq_min = dbs_tuners_ins.touch_floor_freq; } + } else { + freq_min = policy->min; + } + + if (freq_next < freq_min) + freq_next = freq_min; + + if (!dbs_tuners_ins.powersave_bias) { + debug_freq = freq_next; + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } else { + int freq = powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + debug_freq = freq; + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int sample_type = dbs_info->sample_type; + + int delay; + + mutex_lock(&dbs_info->timer_mutex); + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + if (!dbs_tuners_ins.powersave_bias || + sample_type == DBS_NORMAL_SAMPLE) { + dbs_check_cpu(dbs_info); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = DBS_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; + } else { + /* We want all CPUs to do sampling nearly on + * same jiffy + */ + delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + } + } else { + __cpufreq_driver_target(dbs_info->cur_policy, + dbs_info->freq_lo, CPUFREQ_RELATION_H); + delay = dbs_info->freq_lo_jiffies; + } + schedule_delayed_work_on(cpu, &dbs_info->work, delay); + mutex_unlock(&dbs_info->timer_mutex); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + cancel_delayed_work_sync(&dbs_info->work); +} + +/* + * Not all CPUs want IO time to be accounted as busy; this dependson how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (androidlcom) calis this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) andl later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return DEF_IO_IS_BUSY; +} + +#define AID_SYSTEM (1000) +static void dbs_chown(void) +{ + int ret; + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/ignore_nice_load", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown ignore_nice_load error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/io_is_busy", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown io_is_busy error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/powersave_bias", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown powersave_bias error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/sampling_down_factor", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_down_factor error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/sampling_rate", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown sampling_rate error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_floor_freq", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_floor_freq error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_floor_time", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_floor_time error: %d", ret); + + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/up_threshold", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown up_threshold error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/down_differential", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown down_differential error: %d", ret); + + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_poke", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_poke error: %d", ret); +} + +static void dbs_refresh_callback(struct work_struct *unused) +{ + struct cpufreq_policy *policy; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int nr_cpus; + unsigned int touch_poke_freq; + unsigned int cpu = smp_processor_id(); + + if (lock_policy_rwsem_write(cpu) < 0) + return; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + policy = this_dbs_info->cur_policy; + + if (Touch_poke_boost) + Touch_poke_boost_till_jiffies = + jiffies + msecs_to_jiffies(dbs_tuners_ins.touch_floor_time); + + /* We poke the frequency base on the online cpu number */ + nr_cpus = num_online_cpus(); + + if (!is_lp_cluster()) + touch_poke_freq = Touch_poke_attr[nr_cpus-1]; + else + touch_poke_freq = idle_top_freq; + + if(touch_poke_freq == 0 || policy->cur >= touch_poke_freq){ + unlock_policy_rwsem_write(cpu); + return; + } + + __cpufreq_driver_target(policy, touch_poke_freq, + CPUFREQ_RELATION_L); + this_dbs_info->prev_cpu_idle = get_cpu_idle_time(cpu, + &this_dbs_info->prev_cpu_wall); + + unlock_policy_rwsem_write(cpu); +} + +static DECLARE_WORK(dbs_refresh_work, dbs_refresh_callback); + +static void dbs_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + if (dbs_tuners_ins.touch_poke) + schedule_work(&dbs_refresh_work); +} + +static int input_dev_filter(const char* input_dev_name) +{ + int ret = 0; + if (strstr(input_dev_name, "touchscreen") || + strstr(input_dev_name, "-keypad") || + strstr(input_dev_name, "-nav") || + strstr(input_dev_name, "-oj")) { + } + else { + ret = 1; + } + return ret; +} + + +static int dbs_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + struct input_handle *handle; + int error; + + /* filter out those input_dev that we don't care */ + if (input_dev_filter(dev->name)) + return 0; + + handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "cpufreq"; + + error = input_register_handle(handle); + if (error) + goto err2; + + error = input_open_device(handle); + if (error) + goto err1; + + return 0; +err1: + input_unregister_handle(handle); +err2: + kfree(handle); + return error; +} + +static void dbs_input_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id dbs_ids[] = { + { .driver_info = 1 }, + { }, +}; +static struct input_handler dbs_input_handler = { + .event = dbs_input_event, + .connect = dbs_input_connect, + .disconnect = dbs_input_disconnect, + .name = "cpufreq_ond", + .id_table = dbs_ids, +}; + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + mutex_lock(&dbs_mutex); + + dbs_enable++; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + if (dbs_tuners_ins.ignore_nice) { + j_dbs_info->prev_cpu_nice = + kstat_cpu(j).cpustat.nice; + } + } + this_dbs_info->cpu = cpu; + this_dbs_info->rate_mult = 1; + touchdemand_powersave_bias_init_cpu(cpu); + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + + rc = sysfs_create_group(cpufreq_global_kobject, + &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + dbs_chown(); + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); + if (def_sampling_rate) + dbs_tuners_ins.sampling_rate = def_sampling_rate; + dbs_tuners_ins.origin_sampling_rate = dbs_tuners_ins.sampling_rate; + dbs_tuners_ins.io_is_busy = should_io_be_busy(); + } + if (!cpu) + rc = input_register_handler(&dbs_input_handler); + + mutex_unlock(&dbs_mutex); + + mutex_init(&this_dbs_info->timer_mutex); + dbs_timer_init(this_dbs_info); + break; + + case CPUFREQ_GOV_STOP: + dbs_timer_exit(this_dbs_info); + + mutex_lock(&dbs_mutex); + mutex_destroy(&this_dbs_info->timer_mutex); + dbs_enable--; + + if (!cpu) + input_unregister_handler(&dbs_input_handler); + + if (!dbs_enable) + sysfs_remove_group(cpufreq_global_kobject, + &dbs_attr_group); + mutex_unlock(&dbs_mutex); + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&this_dbs_info->timer_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + mutex_unlock(&this_dbs_info->timer_mutex); + break; + } + return 0; +} + +static int __init cpufreq_gov_dbs_init(void) +{ + cputime64_t wall; + u64 idle_time; + int cpu = get_cpu(); + + cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); + idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + + idle_time = get_cpu_idle_time_us(cpu, &wall); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_tuners_ins.down_differential = + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + /* + * In no_hz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + /* For correct statistics, we need 10 ticks for each measure */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + } + def_sampling_rate = DEF_SAMPLING_RATE; + + return cpufreq_register_governor(&cpufreq_gov_touchdemand); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_touchdemand); +} + + +MODULE_AUTHOR("Venkatesh Pallipadi "); +MODULE_AUTHOR("Alexey Starikovskiy "); +MODULE_DESCRIPTION("'cpufreq_touchdemand' - A dynamic cpufreq governor " + "based on ondemand modified for Tegra 3 by Metallice"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 251a08aaac9..673d2e58cee 100755 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -367,6 +367,9 @@ extern struct cpufreq_governor cpufreq_gov_userspace; #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) extern struct cpufreq_governor cpufreq_gov_ondemand; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND) +extern struct cpufreq_governor cpufreq_gov_touchdemand; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_touchdemand) #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) extern struct cpufreq_governor cpufreq_gov_conservative; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) From d4df31273d8a3756f737357d59bf8cf8a211f678 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 23:47:58 -0500 Subject: [PATCH 289/678] mach-tegra: board-grouper-panel.c: tweak backlight levels --- arch/arm/mach-tegra/board-grouper-panel.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index b169f8905c3..aeb1139f051 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -66,6 +66,7 @@ static struct regulator *grouper_lvds_reg; static struct regulator *grouper_lvds_vdd_panel; static tegra_dc_bl_output grouper_bl_output_measured = { + /* Stock Backlight values */ /* 0, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -99,10 +100,12 @@ static tegra_dc_bl_output grouper_bl_output_measured = { 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 */ - /* 0 - 15 */ - 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, - /* 16 - 31 */ - 11, 12, 13, 13, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, + /* 0 - 9 */ /* Unused by standard android brightness settings */ + 0, 1, 2, 3, 4, 5, 6, 6, 7, 7, + /* 10 - 15 */ + 8, 9, 10, 11, 12, 13, + /* 16 - 31 */ /* Screen dimmer minimum */ + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* 32 - 47 */ 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, /* 48 - 63 */ @@ -443,7 +446,7 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 19, + .panel_min_brightness = 16, .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, From 99f2c0f4972c637a61154dff0011e30416c34ba0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 23:48:39 -0500 Subject: [PATCH 290/678] mach-tegra: tegra3_dvfs.c: clean up --- arch/arm/mach-tegra/tegra3_dvfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 9b9408fb92c..cfbdc261dc4 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -177,10 +177,6 @@ static struct dvfs cpu_dvfs_table[] = { /* Nexus 7 - faking speedo id = 4, process id =2*/ /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700, 1700, 1800, 1900), - - /*Cpu voltages (mV): 775, 775, 825, 825, 900, 900, 975, 975, 1000, 1000, 1025, 1050, 1100, 1125, 1175, 1200, 1212, 1237 */ -// CPU_DVFS("cpu_g", 4, 2, MHZ, 475, 475, 620, 620, 860, 860, 1000, 1000, 1100, 1100, 1200, 1300, 1400, 1500, 1600), - CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), From 14125c6e00de75f381ad682b00fa7dccd6dcb2f0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 23:49:17 -0500 Subject: [PATCH 291/678] cpufreq: touchdemand: add touch factor, a down differential multiplier tweak some values as well --- drivers/cpufreq/cpufreq_touchdemand.c | 45 ++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index f8fb6d29536..c66a4698744 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -41,12 +41,12 @@ * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define DEF_FREQUENCY_UP_THRESHOLD (85) #define DEF_SAMPLING_DOWN_FACTOR (2) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) -#define MICRO_FREQUENCY_UP_THRESHOLD (80) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) +#define MICRO_FREQUENCY_UP_THRESHOLD (85) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -134,6 +134,7 @@ static struct dbs_tuners { unsigned int io_is_busy; unsigned int touch_floor_freq; unsigned int touch_floor_time; + unsigned int touch_factor; unsigned int touch_poke; unsigned int origin_sampling_rate; } dbs_tuners_ins = { @@ -142,8 +143,9 @@ static struct dbs_tuners { .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, - .touch_floor_freq = 860000, + .touch_floor_freq = 666000, .touch_floor_time = 2000, + .touch_factor = 2, .touch_poke = 1, }; @@ -286,6 +288,7 @@ show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); show_one(touch_floor_freq, touch_floor_freq); show_one(touch_floor_time, touch_floor_time); +show_one(touch_factor, touch_factor); show_one(touch_poke, touch_poke); static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, @@ -329,7 +332,21 @@ static ssize_t store_touch_floor_time(struct kobject *a, struct attribute *b, return count; } -static unsigned int Touch_poke_attr[4] = {1500000, 1100000, 0, 0}; +static ssize_t store_touch_factor(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + dbs_tuners_ins.touch_factor = input; + + return count; +} + +static unsigned int Touch_poke_attr[4] = {1200000, 1000000, 0, 0}; static unsigned int Touch_poke_boost = 1; static unsigned long Touch_poke_boost_till_jiffies = 0; @@ -475,6 +492,7 @@ define_one_global_rw(ignore_nice_load); define_one_global_rw(powersave_bias); define_one_global_rw(touch_floor_freq); define_one_global_rw(touch_floor_time); +define_one_global_rw(touch_factor); define_one_global_rw(touch_poke); static struct attribute *dbs_attributes[] = { @@ -488,6 +506,7 @@ static struct attribute *dbs_attributes[] = { &io_is_busy.attr, &touch_floor_freq.attr, &touch_floor_time.attr, + &touch_factor.attr, &touch_poke.attr, NULL }; @@ -522,6 +541,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) unsigned int debug_freq; unsigned int debug_load; unsigned int debug_iowait; + unsigned int down_diff; struct cpufreq_policy *policy; unsigned int j; @@ -640,14 +660,20 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * can support the current CPU usage without triggering the up * policy. To be safe, we focus 10 points under the threshold. */ + if (Touch_poke_boost_till_jiffies > jiffies) { + down_diff = (dbs_tuners_ins.down_differential * dbs_tuners_ins.touch_factor); + } else { + down_diff = dbs_tuners_ins.down_differential; + } + if (max_load_freq < - (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + (dbs_tuners_ins.up_threshold - down_diff) * policy->cur) { unsigned int freq_next; unsigned int freq_min; freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - - dbs_tuners_ins.down_differential); + down_diff); /* No longer fully busy, reset rate_mult */ this_dbs_info->rate_mult = 1; @@ -791,6 +817,9 @@ static void dbs_chown(void) if (ret) pr_err("sys_chown touch_floor_time error: %d", ret); + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_factor", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_factor error: %d", ret); ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/up_threshold", low2highuid(AID_SYSTEM), low2highgid(0)); if (ret) From 98b6e31e1391be3570ff761771547d7494099d5d Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 23:50:48 -0500 Subject: [PATCH 292/678] ARM: tegra: Add Tegra AHB driver --- arch/arm/configs/metallice_grouper_defconfig | 5 +- arch/arm/mach-tegra/Kconfig | 8 + drivers/Makefile | 2 +- drivers/amba/Makefile | 4 +- drivers/amba/tegra-ahb.c | 261 +++++++++++++++++++ 5 files changed, 276 insertions(+), 4 deletions(-) create mode 100644 drivers/amba/tegra-ahb.c diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 01ca40768ab..ac11d32a7e2 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -295,6 +295,7 @@ CONFIG_ARCH_TEGRA_HAS_DUAL_CPU_CLUSTERS=y CONFIG_ARCH_TEGRA_HAS_PCIE=y CONFIG_ARCH_TEGRA_HAS_SATA=y CONFIG_TEGRA_PCI=y +CONFIG_TEGRA_AHB=y # # Tegra board type @@ -526,13 +527,15 @@ CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND=y # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_TOUCHDEMAND=y CONFIG_CPU_FREQ_GOV_INTERACTIVE=y # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index cc4137531bf..6fdaaff051e 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -71,6 +71,14 @@ config TEGRA_PCI help Adds PCIe Host controller driver for tegra based systems +config TEGRA_AHB + bool "Enable AHB driver for NVIDIA Tegra SoCs" + default y + help + Adds AHB configuration functionality for NVIDIA Tegra SoCs, + which controls AHB bus master arbitration and some + perfomance parameters(priority, prefech size). + comment "Tegra board type" config MACH_HARMONY diff --git a/drivers/Makefile b/drivers/Makefile index 24e48fc3526..bad062b86c6 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_SFI) += sfi/ # PnP must come after ACPI since it will eventually need to check if acpi # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ -obj-$(CONFIG_ARM_AMBA) += amba/ +obj-$(CONFIG_ARM) += amba/ # Many drivers will want to use DMA so this has to be made available # really early. obj-$(CONFIG_DMA_ENGINE) += dma/ diff --git a/drivers/amba/Makefile b/drivers/amba/Makefile index 40fe74097be..66e81c2f1e3 100644 --- a/drivers/amba/Makefile +++ b/drivers/amba/Makefile @@ -1,2 +1,2 @@ -obj-y += bus.o - +obj-$(CONFIG_ARM_AMBA) += bus.o +obj-$(CONFIG_TEGRA_AHB) += tegra-ahb.o diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c new file mode 100644 index 00000000000..106a780d29a --- /dev/null +++ b/drivers/amba/tegra-ahb.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * Copyright (C) 2011 Google, Inc. + * + * Author: + * Jay Cheng + * James Wylder + * Benoit Goby + * Colin Cross + * Hiroshi DOYU + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include + +#define DRV_NAME "tegra-ahb" + +#define AHB_ARBITRATION_DISABLE 0x00 +#define AHB_ARBITRATION_PRIORITY_CTRL 0x04 +#define AHB_PRIORITY_WEIGHT(x) (((x) & 0x7) << 29) +#define PRIORITY_SELECT_USB BIT(6) +#define PRIORITY_SELECT_USB2 BIT(18) +#define PRIORITY_SELECT_USB3 BIT(17) + +#define AHB_GIZMO_AHB_MEM 0x0c +#define ENB_FAST_REARBITRATE BIT(2) +#define DONT_SPLIT_AHB_WR BIT(7) + +#define AHB_GIZMO_APB_DMA 0x10 +#define AHB_GIZMO_IDE 0x18 +#define AHB_GIZMO_USB 0x1c +#define AHB_GIZMO_AHB_XBAR_BRIDGE 0x20 +#define AHB_GIZMO_CPU_AHB_BRIDGE 0x24 +#define AHB_GIZMO_COP_AHB_BRIDGE 0x28 +#define AHB_GIZMO_XBAR_APB_CTLR 0x2c +#define AHB_GIZMO_VCP_AHB_BRIDGE 0x30 +#define AHB_GIZMO_NAND 0x3c +#define AHB_GIZMO_SDMMC4 0x44 +#define AHB_GIZMO_XIO 0x48 +#define AHB_GIZMO_BSEV 0x60 +#define AHB_GIZMO_BSEA 0x70 +#define AHB_GIZMO_NOR 0x74 +#define AHB_GIZMO_USB2 0x78 +#define AHB_GIZMO_USB3 0x7c +#define IMMEDIATE BIT(18) + +#define AHB_GIZMO_SDMMC1 0x80 +#define AHB_GIZMO_SDMMC2 0x84 +#define AHB_GIZMO_SDMMC3 0x88 +#define AHB_MEM_PREFETCH_CFG_X 0xd8 +#define AHB_ARBITRATION_XBAR_CTRL 0xdc +#define AHB_MEM_PREFETCH_CFG3 0xe0 +#define AHB_MEM_PREFETCH_CFG4 0xe4 +#define AHB_MEM_PREFETCH_CFG1 0xec +#define AHB_MEM_PREFETCH_CFG2 0xf0 +#define PREFETCH_ENB BIT(31) +#define MST_ID(x) (((x) & 0x1f) << 26) +#define AHBDMA_MST_ID MST_ID(5) +#define USB_MST_ID MST_ID(6) +#define USB2_MST_ID MST_ID(18) +#define USB3_MST_ID MST_ID(17) +#define ADDR_BNDRY(x) (((x) & 0xf) << 21) +#define INACTIVITY_TIMEOUT(x) (((x) & 0xffff) << 0) + +#define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 + +static const u32 tegra_ahb_gizmo[] = { + AHB_ARBITRATION_DISABLE, + AHB_ARBITRATION_PRIORITY_CTRL, + AHB_GIZMO_AHB_MEM, + AHB_GIZMO_APB_DMA, + AHB_GIZMO_IDE, + AHB_GIZMO_USB, + AHB_GIZMO_AHB_XBAR_BRIDGE, + AHB_GIZMO_CPU_AHB_BRIDGE, + AHB_GIZMO_COP_AHB_BRIDGE, + AHB_GIZMO_XBAR_APB_CTLR, + AHB_GIZMO_VCP_AHB_BRIDGE, + AHB_GIZMO_NAND, + AHB_GIZMO_SDMMC4, + AHB_GIZMO_XIO, + AHB_GIZMO_BSEV, + AHB_GIZMO_BSEA, + AHB_GIZMO_NOR, + AHB_GIZMO_USB2, + AHB_GIZMO_USB3, + AHB_GIZMO_SDMMC1, + AHB_GIZMO_SDMMC2, + AHB_GIZMO_SDMMC3, + AHB_MEM_PREFETCH_CFG_X, + AHB_ARBITRATION_XBAR_CTRL, + AHB_MEM_PREFETCH_CFG3, + AHB_MEM_PREFETCH_CFG4, + AHB_MEM_PREFETCH_CFG1, + AHB_MEM_PREFETCH_CFG2, + AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID, +}; + +struct tegra_ahb { + void __iomem *regs; + struct device *dev; + u32 ctx[0]; +}; + +static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) +{ + return readl(ahb->regs + offset); +} + +static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) +{ + writel(value, ahb->regs + offset); +} + +static int tegra_ahb_suspend(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + ahb->ctx[i] = gizmo_readl(ahb, tegra_ahb_gizmo[i]); + return 0; +} + +static int tegra_ahb_resume(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]); + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm, + tegra_ahb_suspend, + tegra_ahb_resume, NULL); + +static void tegra_ahb_gizmo_init(struct tegra_ahb *ahb) +{ + u32 val; + + val = gizmo_readl(ahb, AHB_GIZMO_AHB_MEM); + val |= ENB_FAST_REARBITRATE | IMMEDIATE | DONT_SPLIT_AHB_WR; + gizmo_writel(ahb, val, AHB_GIZMO_AHB_MEM); + + val = gizmo_readl(ahb, AHB_GIZMO_USB); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB); + + val = gizmo_readl(ahb, AHB_GIZMO_USB2); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB2); + + val = gizmo_readl(ahb, AHB_GIZMO_USB3); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB3); + + val = gizmo_readl(ahb, AHB_ARBITRATION_PRIORITY_CTRL); + val |= PRIORITY_SELECT_USB | + PRIORITY_SELECT_USB2 | + PRIORITY_SELECT_USB3 | + AHB_PRIORITY_WEIGHT(7); + gizmo_writel(ahb, val, AHB_ARBITRATION_PRIORITY_CTRL); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG1); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + AHBDMA_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG1); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG2); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG2); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG3); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB3_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG3); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG4); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB2_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG4); +} + +static int __devinit tegra_ahb_probe(struct platform_device *pdev) +{ + struct resource *res; + struct tegra_ahb *ahb; + size_t bytes; + + bytes = sizeof(*ahb) + sizeof(u32) * ARRAY_SIZE(tegra_ahb_gizmo); + ahb = devm_kzalloc(&pdev->dev, bytes, GFP_KERNEL); + if (!ahb) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + ahb->regs = devm_request_and_ioremap(&pdev->dev, res); + if (!ahb->regs) + return -EBUSY; + + ahb->dev = &pdev->dev; + platform_set_drvdata(pdev, ahb); + tegra_ahb_gizmo_init(ahb); + return 0; +} + +static int __devexit tegra_ahb_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { + { .compatible = "nvidia,tegra30-ahb", }, + { .compatible = "nvidia,tegra20-ahb", }, + {}, +}; + +static struct platform_driver tegra_ahb_driver = { + .probe = tegra_ahb_probe, + .remove = __devexit_p(tegra_ahb_remove), + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = tegra_ahb_of_match, + .pm = &tegra_ahb_pm, + }, +}; +module_platform_driver(tegra_ahb_driver); + +MODULE_AUTHOR("Hiroshi DOYU "); +MODULE_DESCRIPTION("Tegra AHB driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:" DRV_NAME); From b3b34c01a789e0d4716554a0c066603787fa398b Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 23:53:00 -0500 Subject: [PATCH 293/678] missing file from ahb commit --- .../bindings/arm/tegra/nvidia,tegra20-ahb.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt new file mode 100644 index 00000000000..234406d41c1 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt @@ -0,0 +1,11 @@ +NVIDIA Tegra AHB + +Required properties: +- compatible : "nvidia,tegra20-ahb" or "nvidia,tegra30-ahb" +- reg : Should contain 1 register ranges(address and length) + +Example: + ahb: ahb@6000c004 { + compatible = "nvidia,tegra20-ahb"; + reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ + }; From 33c9ecb45e41274777207bfcb1a24a22eb2ae0e3 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 11 Jan 2013 23:54:42 -0500 Subject: [PATCH 294/678] ARM: tegra: Add SMMU enabler in AHB --- drivers/amba/tegra-ahb.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 106a780d29a..aa0b1f16052 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -76,6 +76,10 @@ #define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 +#define AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE BIT(17) + +static struct platform_driver tegra_ahb_driver; + static const u32 tegra_ahb_gizmo[] = { AHB_ARBITRATION_DISABLE, AHB_ARBITRATION_PRIORITY_CTRL, @@ -124,6 +128,34 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) writel(value, ahb->regs + offset); } +#ifdef CONFIG_ARCH_TEGRA_3x_SOC +static int tegra_ahb_match_by_smmu(struct device *dev, void *data) +{ + struct tegra_ahb *ahb = dev_get_drvdata(dev); + struct device_node *dn = data; + + return (ahb->dev->of_node == dn) ? 1 : 0; +} + +int tegra_ahb_enable_smmu(struct device_node *dn) +{ + struct device *dev; + u32 val; + struct tegra_ahb *ahb; + + dev = driver_find_device(&tegra_ahb_driver.driver, NULL, dn, + tegra_ahb_match_by_smmu); + if (!dev) + return -EPROBE_DEFER; + ahb = dev_get_drvdata(dev); + val = gizmo_readl(ahb, AHB_ARBITRATION_XBAR_CTRL); + val |= AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE; + gizmo_writel(ahb, val, AHB_ARBITRATION_XBAR_CTRL); + return 0; +} +EXPORT_SYMBOL(tegra_ahb_enable_smmu); +#endif + static int tegra_ahb_suspend(struct device *dev) { int i; From 73be60573c0d1c7ee6edc31a778db8ce49e64077 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 12 Jan 2013 00:38:21 -0500 Subject: [PATCH 295/678] amba: tegra-ahb: Remove empty *_remove() --- drivers/amba/tegra-ahb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index aa0b1f16052..0b6f0b28a48 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -264,11 +264,6 @@ static int __devinit tegra_ahb_probe(struct platform_device *pdev) return 0; } -static int __devexit tegra_ahb_remove(struct platform_device *pdev) -{ - return 0; -} - static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { { .compatible = "nvidia,tegra30-ahb", }, { .compatible = "nvidia,tegra20-ahb", }, @@ -277,7 +272,6 @@ static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { static struct platform_driver tegra_ahb_driver = { .probe = tegra_ahb_probe, - .remove = __devexit_p(tegra_ahb_remove), .driver = { .name = DRV_NAME, .owner = THIS_MODULE, From 94b7246c25c18b8434f9dcae65f9a65110eb6db0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 12 Jan 2013 00:41:02 -0500 Subject: [PATCH 296/678] iommu/tegra: smmu: Refrain from accessing to AHB --- drivers/iommu/tegra-smmu.c | 63 +++++++++++++++----------------------- include/linux/errno.h | 2 ++ 2 files changed, 27 insertions(+), 38 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5f020f6a7d7..5eb0772e51e 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -30,13 +30,15 @@ #include #include #include +#include #include #include #include #include -#include +#include +//#include /* bitmap of the page sizes currently supported */ #define SMMU_IOMMU_PGSIZES (SZ_4K) @@ -112,11 +114,6 @@ #define SMMU_PDE_NEXT_SHIFT 28 -/* AHB Arbiter Registers */ -#define AHB_XBAR_CTRL 0xe0 -#define AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE 1 -#define AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT 17 - #define SMMU_NUM_ASIDS 4 #define SMMU_TLB_FLUSH_VA_SECTION__MASK 0xffc00000 #define SMMU_TLB_FLUSH_VA_SECTION__SHIFT 12 /* right shift */ @@ -236,7 +233,7 @@ struct smmu_as { * Per SMMU device - IOMMU device */ struct smmu_device { - void __iomem *regs, *regs_ahbarb; + void __iomem *regs; unsigned long iovmm_base; /* remappable base address */ unsigned long page_count; /* total remappable size */ spinlock_t lock; @@ -253,12 +250,14 @@ struct smmu_device { unsigned long translation_enable_1; unsigned long translation_enable_2; unsigned long asid_security; + + struct device_node *ahb; }; static struct smmu_device *smmu_handle; /* unique for a system */ /* - * SMMU/AHB register accessors + * SMMU register accessors */ static inline u32 smmu_read(struct smmu_device *smmu, size_t offs) { @@ -269,15 +268,6 @@ static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs) writel(val, smmu->regs + offs); } -static inline u32 ahb_read(struct smmu_device *smmu, size_t offs) -{ - return readl(smmu->regs_ahbarb + offs); -} -static inline void ahb_write(struct smmu_device *smmu, u32 val, size_t offs) -{ - writel(val, smmu->regs_ahbarb + offs); -} - #define VA_PAGE_TO_PA(va, page) \ (page_to_phys(page) + ((unsigned long)(va) & ~PAGE_MASK)) @@ -371,9 +361,9 @@ static void smmu_flush_regs(struct smmu_device *smmu, int enable) FLUSH_SMMU_REGS(smmu); } -static void smmu_setup_regs(struct smmu_device *smmu) +static int smmu_setup_regs(struct smmu_device *smmu) { - int i; + int i, err; u32 val; for (i = 0; i < smmu->num_as; i++) { @@ -399,10 +389,8 @@ static void smmu_setup_regs(struct smmu_device *smmu) smmu_flush_regs(smmu, 1); - val = ahb_read(smmu, AHB_XBAR_CTRL); - val |= AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE << - AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT; - ahb_write(smmu, val, AHB_XBAR_CTRL); + err = tegra_ahb_enable_smmu(smmu->ahb); + return err; } static void flush_ptc_and_tlb(struct smmu_device *smmu, @@ -874,18 +862,18 @@ static int tegra_smmu_resume(struct device *dev) { struct smmu_device *smmu = dev_get_drvdata(dev); unsigned long flags; + int err; spin_lock_irqsave(&smmu->lock, flags); - smmu_setup_regs(smmu); + err = smmu_setup_regs(smmu); spin_unlock_irqrestore(&smmu->lock, flags); - return 0; + return err; } static int tegra_smmu_probe(struct platform_device *pdev) { struct smmu_device *smmu; - struct resource *regs, *regs2; - struct tegra_smmu_window *window; + struct resource *regs, *window; struct device *dev = &pdev->dev; int i, err = 0; @@ -895,9 +883,8 @@ static int tegra_smmu_probe(struct platform_device *pdev) BUILD_BUG_ON(PAGE_SHIFT != SMMU_PAGE_SHIFT); regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - regs2 = platform_get_resource(pdev, IORESOURCE_MEM, 1); - window = tegra_smmu_window(0); - if (!regs || !regs2 || !window) { + window = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!regs || !window) { dev_err(dev, "No SMMU resources\n"); return -ENODEV; } @@ -913,14 +900,16 @@ static int tegra_smmu_probe(struct platform_device *pdev) smmu->iovmm_base = (unsigned long)window->start; smmu->page_count = (window->end + 1 - window->start) >> SMMU_PAGE_SHIFT; smmu->regs = devm_ioremap(dev, regs->start, resource_size(regs)); - smmu->regs_ahbarb = devm_ioremap(dev, regs2->start, - resource_size(regs2)); - if (!smmu->regs || !smmu->regs_ahbarb) { + if (!smmu->regs) { dev_err(dev, "failed to remap SMMU registers\n"); err = -ENXIO; goto fail; } + smmu->ahb = of_parse_phandle(pdev->dev.of_node, "ahb", 0); + if (!smmu->ahb) + return -ENODEV; + smmu->translation_enable_0 = ~0; smmu->translation_enable_1 = ~0; smmu->translation_enable_2 = ~0; @@ -947,7 +936,9 @@ static int tegra_smmu_probe(struct platform_device *pdev) INIT_LIST_HEAD(&as->client); } spin_lock_init(&smmu->lock); - smmu_setup_regs(smmu); + err = smmu_setup_regs(smmu); + if (err) + goto fail; platform_set_drvdata(pdev, smmu); smmu->avp_vector_page = alloc_page(GFP_KERNEL); @@ -962,8 +953,6 @@ static int tegra_smmu_probe(struct platform_device *pdev) __free_page(smmu->avp_vector_page); if (smmu->regs) devm_iounmap(dev, smmu->regs); - if (smmu->regs_ahbarb) - devm_iounmap(dev, smmu->regs_ahbarb); if (smmu && smmu->as) { for (i = 0; i < smmu->num_as; i++) { if (smmu->as[i].pdir_page) { @@ -995,8 +984,6 @@ static int tegra_smmu_remove(struct platform_device *pdev) __free_page(smmu->avp_vector_page); if (smmu->regs) devm_iounmap(dev, smmu->regs); - if (smmu->regs_ahbarb) - devm_iounmap(dev, smmu->regs_ahbarb); devm_kfree(dev, smmu); smmu_handle = NULL; return 0; diff --git a/include/linux/errno.h b/include/linux/errno.h index 46685832ed9..e02de468105 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -16,6 +16,8 @@ #define ERESTARTNOHAND 514 /* restart if no handler.. */ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ +#define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ From cdebd6530d89e7cee4c3f0283dc0fbba55c6d7de Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 13 Jan 2013 16:01:01 -0500 Subject: [PATCH 297/678] cpufreq: touchdemand: more balanced defaults --- drivers/cpufreq/cpufreq_touchdemand.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index c66a4698744..1599c3254b9 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -42,11 +42,11 @@ */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define DEF_FREQUENCY_UP_THRESHOLD (85) +#define DEF_FREQUENCY_UP_THRESHOLD (90) #define DEF_SAMPLING_DOWN_FACTOR (2) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define MICRO_FREQUENCY_UP_THRESHOLD (85) +#define MICRO_FREQUENCY_UP_THRESHOLD (90) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -144,7 +144,7 @@ static struct dbs_tuners { .ignore_nice = 0, .powersave_bias = 0, .touch_floor_freq = 666000, - .touch_floor_time = 2000, + .touch_floor_time = 1000, .touch_factor = 2, .touch_poke = 1, }; From 2d7003f20f51d84d9b1bd1e5cd9e475e1fac4508 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 16:14:05 -0500 Subject: [PATCH 298/678] sound: soc: tegra: tegra_pcm.c: fix audio perflock init --- sound/soc/tegra/tegra_pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 934cf06f48a..873ce0a7219 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -448,7 +448,7 @@ static int __init snd_tegra_pcm_init(void) #ifdef CONFIG_AUDIO_MIN_PERFLOCK pm_qos_add_request(&playback_cpu_freq_req, PM_QOS_CPU_FREQ_MIN, - (s32)PLAYBACK_CPU_FREQ_MIN); + PM_QOS_DEFAULT_VALUE); #endif return platform_driver_register(&tegra_pcm_driver); } From 597839a54b00b440c9e33018bd1556162dff088d Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 16:15:33 -0500 Subject: [PATCH 299/678] mach-tegra: cpu-tegra3.c: increase runnable threads threshold 3 --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 016674b4ad9..ca252d8d6e2 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -194,7 +194,7 @@ enum { #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ - 5, 9, 10, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ + 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; From 816a5538650bf13644f764f98c42b7f9c307b9bd Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 16:17:26 -0500 Subject: [PATCH 300/678] mach-tegra: edp.c: lower edp freqs to T30L defaults, were upped by SOC hack --- arch/arm/mach-tegra/edp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/edp.c b/arch/arm/mach-tegra/edp.c index b8d3fd64d7f..1ba0e5f1530 100644 --- a/arch/arm/mach-tegra/edp.c +++ b/arch/arm/mach-tegra/edp.c @@ -362,10 +362,10 @@ void __init tegra_init_cpu_edp_limits(unsigned int regulator_mA) for (j = 0; j < edp_limits_size; j++) { e[j].temperature = (int)t[i+j].temperature; - e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-10) * 10000; - e[j].freq_limits[1] = (unsigned int)t[i+j].freq_limits[1] * 10000; - e[j].freq_limits[2] = (unsigned int)t[i+j].freq_limits[2] * 10000; - e[j].freq_limits[3] = (unsigned int)t[i+j].freq_limits[3] * 10000; + e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-40) * 10000; + e[j].freq_limits[1] = (unsigned int)(t[i+j].freq_limits[1]-30) * 10000; + e[j].freq_limits[2] = (unsigned int)(t[i+j].freq_limits[2]-30) * 10000; + e[j].freq_limits[3] = (unsigned int)(t[i+j].freq_limits[3]-30) * 10000; } if (edp_limits != edp_default_limits) From 0a6946a973d23b48f02e6d0814e1c4960f1c637f Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 16:18:39 -0500 Subject: [PATCH 301/678] mach-tegra: tegra3_clocks.c: misc changes --- arch/arm/mach-tegra/tegra3_clocks.c | 49 +++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 29cbde74795..43bc38f9847 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3622,6 +3622,13 @@ static struct clk_pll_freq_table tegra_pll_x_freq_table[] = { { 19200000, 1000000000, 625, 12, 1, 8}, { 26000000, 1000000000, 1000, 26, 1, 8}, + /* 666 MHz */ + { 12000000, 666000000, 555, 10, 1, 8}, + { 13000000, 666000000, 666, 13, 1, 8}, + { 16800000, 666000000, 555, 14, 1, 8}, /* actual: 999.6 MHz */ + { 19200000, 666000000, 555, 16, 1, 8}, + { 26000000, 666000000, 333, 13, 1, 8}, + { 0, 0, 0, 0, 0, 0 }, }; @@ -3925,7 +3932,7 @@ static struct clk tegra_clk_cclk_lp = { .inputs = mux_cclk_lp, .reg = 0x370, .ops = &tegra_super_ops, - .max_rate = 720000000, + .max_rate = 740000000, }; static struct clk tegra_clk_sclk = { @@ -3953,7 +3960,7 @@ static struct clk tegra_clk_virtual_cpu_lp = { .name = "cpu_lp", .parent = &tegra_clk_cclk_lp, .ops = &tegra_cpu_ops, - .max_rate = 720000000, + .max_rate = 740000000, .u.cpu = { .main = &tegra_pll_x, .backup = &tegra_pll_p, @@ -4700,7 +4707,26 @@ static struct cpufreq_frequency_table freq_table_1p7GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_740 + { 5, 740000 }, +#endif +#ifdef CONFIG_LP_OC_700 + { 5, 700000 }, +#endif +#ifdef CONFIG_LP_OC_666 { 5, 666000 }, +#endif +#ifdef CONFIG_LP_OC_620 + { 5, 620000 }, +#endif +#ifdef CONFIG_LP_OC_555 + { 5, 555000 }, +#endif +#else + { 5, 620000 }, + +#endif { 6, 860000 }, { 7, 1000000 }, { 8, 1100000 }, @@ -4719,7 +4745,26 @@ static struct cpufreq_frequency_table freq_table_1p8GHz[] = { { 2, 204000 }, { 3, 340000 }, { 4, 475000 }, +#ifdef CONFIG_LP_OVERCLOCK +#ifdef CONFIG_LP_OC_740 + { 5, 740000 }, +#endif +#ifdef CONFIG_LP_OC_700 + { 5, 700000 }, +#endif +#ifdef CONFIG_LP_OC_666 { 5, 666000 }, +#endif +#ifdef CONFIG_LP_OC_620 + { 5, 620000 }, +#endif +#ifdef CONFIG_LP_OC_555 + { 5, 555000 }, +#endif +#else + { 5, 620000 }, + +#endif { 6, 860000 }, { 7, 1000000 }, { 8, 1100000 }, From 61fede1f6908ecd5e2baa392bb4d6c4637d3f427 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 16:18:58 -0500 Subject: [PATCH 302/678] cpufreq: touchdemand: add touch minimum core number lock and tweak values --- drivers/cpufreq/cpufreq_touchdemand.c | 82 ++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index 1599c3254b9..de5df595b90 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -36,6 +36,10 @@ #include "../../arch/arm/mach-tegra/clock.h" #include "../../arch/arm/mach-tegra/pm.h" +#include +static struct pm_qos_request_list touch_min_cpu_req; +unsigned int min_cpus_lock; + /* * dbs is used in this file as a shortform for demandbased switching * It helps to keep variable names smaller, simpler @@ -50,7 +54,7 @@ #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -#define DEF_SAMPLING_RATE (50000) +#define DEF_SAMPLING_RATE (40000) #define DEF_IO_IS_BUSY (1) /* @@ -115,7 +119,7 @@ struct cpu_dbs_info_s { struct mutex timer_mutex; }; -static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); +static DEFINE_PER_CPU(struct cpu_dbs_info_s, td_cpu_dbs_info); static unsigned int dbs_enable; /* number of CPUs using this policy */ @@ -134,6 +138,7 @@ static struct dbs_tuners { unsigned int io_is_busy; unsigned int touch_floor_freq; unsigned int touch_floor_time; + unsigned int touch_min_cores; unsigned int touch_factor; unsigned int touch_poke; unsigned int origin_sampling_rate; @@ -144,8 +149,9 @@ static struct dbs_tuners { .ignore_nice = 0, .powersave_bias = 0, .touch_floor_freq = 666000, - .touch_floor_time = 1000, - .touch_factor = 2, + .touch_floor_time = 2000, + .touch_min_cores = 0, + .touch_factor = 4, .touch_poke = 1, }; @@ -205,7 +211,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_hi, freq_lo; unsigned int index = 0; unsigned int jiffies_total, jiffies_hi, jiffies_lo; - struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + struct cpu_dbs_info_s *dbs_info = &per_cpu(td_cpu_dbs_info, policy->cpu); if (!dbs_info->freq_table) { @@ -249,7 +255,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, static void touchdemand_powersave_bias_init_cpu(int cpu) { - struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct cpu_dbs_info_s *dbs_info = &per_cpu(td_cpu_dbs_info, cpu); dbs_info->freq_table = cpufreq_frequency_get_table(cpu); dbs_info->freq_lo = 0; } @@ -288,6 +294,7 @@ show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); show_one(touch_floor_freq, touch_floor_freq); show_one(touch_floor_time, touch_floor_time); +show_one(touch_min_cores, touch_min_cores); show_one(touch_factor, touch_factor); show_one(touch_poke, touch_poke); @@ -332,6 +339,28 @@ static ssize_t store_touch_floor_time(struct kobject *a, struct attribute *b, return count; } +static ssize_t store_touch_min_cores(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 4) + input = 4; + + dbs_tuners_ins.touch_min_cores = input; + + /* Make sure touch lock gets reset */ + pm_qos_update_request(&touch_min_cpu_req, + (s32)PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + min_cpus_lock = 0; + + return count; +} + static ssize_t store_touch_factor(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -426,7 +455,7 @@ static ssize_t store_sampling_down_factor(struct kobject *a, /* Reset down sampling multiplier in case it was active */ for_each_online_cpu(j) { struct cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info = &per_cpu(td_cpu_dbs_info, j); dbs_info->rate_mult = 1; } return count; @@ -455,7 +484,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { struct cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info = &per_cpu(td_cpu_dbs_info, j); dbs_info->prev_cpu_idle = get_cpu_idle_time(j, &dbs_info->prev_cpu_wall); if (dbs_tuners_ins.ignore_nice) @@ -492,6 +521,7 @@ define_one_global_rw(ignore_nice_load); define_one_global_rw(powersave_bias); define_one_global_rw(touch_floor_freq); define_one_global_rw(touch_floor_time); +define_one_global_rw(touch_min_cores); define_one_global_rw(touch_factor); define_one_global_rw(touch_poke); @@ -506,6 +536,7 @@ static struct attribute *dbs_attributes[] = { &io_is_busy.attr, &touch_floor_freq.attr, &touch_floor_time.attr, + &touch_min_cores.attr, &touch_factor.attr, &touch_poke.attr, NULL @@ -555,6 +586,12 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) // if (Touch_poke_boost_till_jiffies > jiffies) // return; + if ((dbs_tuners_ins.touch_min_cores >= 2) && (Touch_poke_boost_till_jiffies < jiffies) && (min_cpus_lock == 1)) { + min_cpus_lock = 0; + pm_qos_update_request(&touch_min_cpu_req, + (s32)PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + } + /* * Every sampling_rate, we check, if current idle time is less * than 20% (default), then we try to increase frequency @@ -577,7 +614,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) unsigned int load, load_freq; int freq_avg; - j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info = &per_cpu(td_cpu_dbs_info, j); cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); @@ -682,7 +719,12 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) if (is_lp_cluster()) { freq_min = idle_top_freq; } else { - freq_min = dbs_tuners_ins.touch_floor_freq; } + if (dbs_tuners_ins.touch_floor_freq >= policy->min) { + freq_min = dbs_tuners_ins.touch_floor_freq; + } else { + freq_min = policy->min; + } + } } else { freq_min = policy->min; } @@ -817,6 +859,10 @@ static void dbs_chown(void) if (ret) pr_err("sys_chown touch_floor_time error: %d", ret); + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_min_cores", low2highuid(AID_SYSTEM), low2highgid(0)); + if (ret) + pr_err("sys_chown touch_min_cores error: %d", ret); + ret = sys_chown("/sys/devices/system/cpu/cpufreq/touchdemand/touch_factor", low2highuid(AID_SYSTEM), low2highgid(0)); if (ret) pr_err("sys_chown touch_factor error: %d", ret); @@ -845,7 +891,7 @@ static void dbs_refresh_callback(struct work_struct *unused) if (lock_policy_rwsem_write(cpu) < 0) return; - this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + this_dbs_info = &per_cpu(td_cpu_dbs_info, cpu); policy = this_dbs_info->cur_policy; if (Touch_poke_boost) @@ -870,6 +916,12 @@ static void dbs_refresh_callback(struct work_struct *unused) this_dbs_info->prev_cpu_idle = get_cpu_idle_time(cpu, &this_dbs_info->prev_cpu_wall); + if ((dbs_tuners_ins.touch_min_cores >= 2) && (!is_lp_cluster()) && (min_cpus_lock == 0)) { + pm_qos_update_request(&touch_min_cpu_req, + (s32)dbs_tuners_ins.touch_min_cores); + min_cpus_lock = 1; + } + unlock_policy_rwsem_write(cpu); } @@ -958,7 +1010,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int j; int rc; - this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + this_dbs_info = &per_cpu(td_cpu_dbs_info, cpu); switch (event) { case CPUFREQ_GOV_START: @@ -970,7 +1022,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_enable++; for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(od_cpu_dbs_info, j); + j_dbs_info = &per_cpu(td_cpu_dbs_info, j); j_dbs_info->cur_policy = policy; j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, @@ -1062,6 +1114,9 @@ static int __init cpufreq_gov_dbs_init(void) cpu_lp_clk = clk_get_sys(NULL, "cpu_lp"); idle_top_freq = clk_get_max_rate(cpu_lp_clk) / 1000; + pm_qos_add_request(&touch_min_cpu_req, PM_QOS_MIN_ONLINE_CPUS, + PM_QOS_DEFAULT_VALUE); + idle_time = get_cpu_idle_time_us(cpu, &wall); put_cpu(); if (idle_time != -1ULL) { @@ -1087,6 +1142,7 @@ static int __init cpufreq_gov_dbs_init(void) static void __exit cpufreq_gov_dbs_exit(void) { + pm_qos_remove_request(&touch_min_cpu_req); cpufreq_unregister_governor(&cpufreq_gov_touchdemand); } From 04f46fdae40969cd69ece12ffaf62c801988aa64 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 16:23:28 -0500 Subject: [PATCH 303/678] arm: tegra: Fix modem_reset_flag assignment The modem_reset_flag should be assigned to 0 (not checked for equality to 0) patch courtesy of Jeremy C. Andrus --- arch/arm/mach-tegra/baseband-xmm-power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/baseband-xmm-power.c b/arch/arm/mach-tegra/baseband-xmm-power.c index 11d9b3f309c..40ef4e1d2da 100755 --- a/arch/arm/mach-tegra/baseband-xmm-power.c +++ b/arch/arm/mach-tegra/baseband-xmm-power.c @@ -807,7 +807,7 @@ static void baseband_xmm_power_init2_work(struct work_struct *work) } else pr_err("%s: hsic_register is missing\n", __func__); register_hsic_device = false; - modem_reset_flag == 0; + modem_reset_flag = 0; } } From 16c76b8d40cc521498e97687a0bfe5db196264a7 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 17:00:20 -0500 Subject: [PATCH 304/678] tegra: Minor bugfixes for Tegra kernels Jeremy C. Andrus --- arch/arm/mach-tegra/sleep.S | 9 +++++++++ drivers/input/lid.c | 4 +++- drivers/input/touchscreen/rmi4/rmi_f09.c | 7 +------ drivers/power/smb347-charger.c | 2 +- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S index 18b8799ea32..4e898b81f36 100644 --- a/arch/arm/mach-tegra/sleep.S +++ b/arch/arm/mach-tegra/sleep.S @@ -49,6 +49,15 @@ #define TEGRA_PMC_VIRT (TEGRA_PMC_BASE - IO_APB_PHYS + IO_APB_VIRT) #define TEGRA_CLK_RESET_VIRT (TEGRA_CLK_RESET_BASE - IO_PPSB_PHYS + IO_PPSB_VIRT) +/* + * ARM security extensions are required when compiling TRUSTED_FOUNDATIONS code, + * and this explicit arch_extension line fixes the build on compilers where the + * as-instr Makefile function fails. The Android cross-compiler is an example. + */ +#ifdef CONFIG_TRUSTED_FOUNDATIONS +.arch_extension sec +#endif + /* * tegra_pen_lock * diff --git a/drivers/input/lid.c b/drivers/input/lid.c index 1d27609c187..26e3d6490e0 100644 --- a/drivers/input/lid.c +++ b/drivers/input/lid.c @@ -163,7 +163,9 @@ static int __init lid_init(void) return -ENOMEM; } - sysfs_create_group((struct kobject*)&lid_dev->dev.kobj, &lid_attr_group); + err_code = sysfs_create_group((struct kobject*)&lid_dev->dev.kobj, &lid_attr_group); + if (err_code != 0) + return err_code; err_code = lid_input_device_create(); if(err_code != 0) diff --git a/drivers/input/touchscreen/rmi4/rmi_f09.c b/drivers/input/touchscreen/rmi4/rmi_f09.c index 0ec980d7db0..1c93451ca17 100644 --- a/drivers/input/touchscreen/rmi4/rmi_f09.c +++ b/drivers/input/touchscreen/rmi4/rmi_f09.c @@ -107,7 +107,7 @@ static struct device_attribute attrs[] = { __ATTR(HostTestEn, RMI_RW_ATTR, rmi_f09_HostTestEn_show, rmi_f09_HostTestEn_store), __ATTR(InternalLimits, RMI_RO_ATTR, - rmi_f09_Limit_Register_Count_show, rmi_store_error), + rmi_f09_InternalLimits_show, rmi_store_error), __ATTR(Result_Register_Count, RMI_RO_ATTR, rmi_f09_Result_Register_Count_show, rmi_store_error), }; @@ -169,11 +169,6 @@ static int rmi_f09_init(struct rmi_function_container *fc) static void rmi_f09_remove(struct rmi_function_container *fc) { - struct rmi_fn_09_data *data = fc->data; - if (data) { - kfree(data->query.Limit_Register_Count); - kfree(data->query.f09_bist_query1); - } kfree(fc->data); } diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 499b1724e32..e466bd548c5 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -1025,7 +1025,7 @@ static void dockin_isr_work_function(struct work_struct *dat) static ssize_t smb347_reg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = charger->client; - uint8_t config_reg[14], cmd_reg[1], status_reg[10]; + uint8_t config_reg[14], cmd_reg[1], status_reg[11]; int i, ret = 0; ret += i2c_smbus_read_i2c_block_data(client, smb347_CHARGE, 15, config_reg) From 5ad9f7ecc4e60c52f650ed31a3341c81562499ee Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 17:01:19 -0500 Subject: [PATCH 305/678] mach-tegra: include: mach: add missing tegra-ahb.h file --- arch/arm/mach-tegra/include/mach/tegra-ahb.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 arch/arm/mach-tegra/include/mach/tegra-ahb.h diff --git a/arch/arm/mach-tegra/include/mach/tegra-ahb.h b/arch/arm/mach-tegra/include/mach/tegra-ahb.h new file mode 100644 index 00000000000..e0f8c84b1d8 --- /dev/null +++ b/arch/arm/mach-tegra/include/mach/tegra-ahb.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __MACH_TEGRA_AHB_H__ +#define __MACH_TEGRA_AHB_H__ + +extern int tegra_ahb_enable_smmu(struct device_node *ahb); + +#endif /* __MACH_TEGRA_AHB_H__ */ From 8dda7737e4bab3c09eb7c0a7c44faf1578b05bf8 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Sat, 15 Sep 2012 15:23:35 +0200 Subject: [PATCH 306/678] Input: Send events one packet at a time On heavy event loads, such as a multitouch driver, the irqsoff latency can be as high as 250 us. By accumulating a frame worth of data before passing it on, the latency can be dramatically reduced. As a side effect, the special EV_SYN handling can be removed, since the frame is now atomic. This patch adds the events() handler callback and uses it if it exists. The latency is improved by 50 us even without the callback. Change-Id: Iebd9b1868ae6300a922a45b6d104e7c2b38e4cf5 Cc: Daniel Kurtz Tested-by: Benjamin Tissoires Tested-by: Ping Cheng Tested-by: Sedat Dilek Acked-by: Dmitry Torokhov Signed-off-by: Henrik Rydberg Input: Improve the events-per-packet estimate The events-per-packet estimate has so far been used by MT devices only. This patch adjusts the packet buffer size to also accomodate the KEY and MSC events. Keyboards normally send one or two keys at a time. MT devices normally send a number of button keys along with the MT information. The buffer size chosen here covers those cases, and matches the default buffer size in evdev. Since the input estimate is now preferred, remove the special input-mt estimate. Reviewed-and-tested-by: Ping Cheng Tested-by: Benjamin Tissoires Acked-by: Dmitry Torokhov Signed-off-by: Henrik Rydberg --- drivers/input/input-mt.c | 1 - drivers/input/input.c | 213 +++++++++++++++++++++++++++------------ include/linux/input.h | 24 ++++- 3 files changed, 169 insertions(+), 69 deletions(-) diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index c48c81f0308..b5993acd48b 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -40,7 +40,6 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots) dev->mtsize = num_slots; input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0); input_set_abs_params(dev, ABS_MT_TRACKING_ID, 0, TRKID_MAX, 0, 0); - input_set_events_per_packet(dev, 6 * num_slots); /* Mark slots as 'unused' */ for (i = 0; i < num_slots; i++) diff --git a/drivers/input/input.c b/drivers/input/input.c index da38d97a51b..e7c716baead 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -47,6 +47,8 @@ static DEFINE_MUTEX(input_mutex); static struct input_handler *input_table[8]; +static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; + static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { @@ -69,42 +71,102 @@ static int input_defuzz_abs_event(int value, int old_val, int fuzz) return value; } +static void input_start_autorepeat(struct input_dev *dev, int code) +{ + if (test_bit(EV_REP, dev->evbit) && + dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && + dev->timer.data) { + dev->repeat_key = code; + mod_timer(&dev->timer, + jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); + } +} + +static void input_stop_autorepeat(struct input_dev *dev) +{ + del_timer(&dev->timer); +} + /* * Pass event first through all filters and then, if event has not been * filtered out, through all open handles. This function is called with * dev->event_lock held and interrupts disabled. */ -static void input_pass_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) +static unsigned int input_to_handler(struct input_handle *handle, + struct input_value *vals, unsigned int count) +{ + struct input_handler *handler = handle->handler; + struct input_value *end = vals; + struct input_value *v; + + for (v = vals; v != vals + count; v++) { + if (handler->filter && + handler->filter(handle, v->type, v->code, v->value)) + continue; + if (end != v) + *end = *v; + end++; + } + + count = end - vals; + if (!count) + return 0; + + if (handler->events) + handler->events(handle, vals, count); + else if (handler->event) + for (v = vals; v != end; v++) + handler->event(handle, v->type, v->code, v->value); + + return count; +} + +/* + * Pass values first through all filters and then, if event has not been + * filtered out, through all open handles. This function is called with + * dev->event_lock held and interrupts disabled. + */ +static void input_pass_values(struct input_dev *dev, + struct input_value *vals, unsigned int count) { - struct input_handler *handler; struct input_handle *handle; + struct input_value *v; + + if (!count) + return; rcu_read_lock(); handle = rcu_dereference(dev->grab); - if (handle) - handle->handler->event(handle, type, code, value); - else { - bool filtered = false; - - list_for_each_entry_rcu(handle, &dev->h_list, d_node) { - if (!handle->open) - continue; + if (handle) { + count = input_to_handler(handle, vals, count); + } else { + list_for_each_entry_rcu(handle, &dev->h_list, d_node) + if (handle->open) + count = input_to_handler(handle, vals, count); + } - handler = handle->handler; - if (!handler->filter) { - if (filtered) - break; + rcu_read_unlock(); - handler->event(handle, type, code, value); + add_input_randomness(vals->type, vals->code, vals->value); - } else if (handler->filter(handle, type, code, value)) - filtered = true; + /* trigger auto repeat for key events */ + for (v = vals; v != vals + count; v++) { + if (v->type == EV_KEY && v->value != 2) { + if (v->value) + input_start_autorepeat(dev, v->code); + else + input_stop_autorepeat(dev); } } +} - rcu_read_unlock(); +static void input_pass_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + struct input_value vals[] = { { type, code, value } }; + + input_pass_values(dev, vals, ARRAY_SIZE(vals)); } /* @@ -121,18 +183,12 @@ static void input_repeat_key(unsigned long data) if (test_bit(dev->repeat_key, dev->key) && is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) { + struct input_value vals[] = { + { EV_KEY, dev->repeat_key, 2 }, + input_value_sync + }; - input_pass_event(dev, EV_KEY, dev->repeat_key, 2); - - if (dev->sync) { - /* - * Only send SYN_REPORT if we are not in a middle - * of driver parsing a new hardware packet. - * Otherwise assume that the driver will send - * SYN_REPORT once it's done. - */ - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); - } + input_pass_values(dev, vals, ARRAY_SIZE(vals)); if (dev->rep[REP_PERIOD]) mod_timer(&dev->timer, jiffies + @@ -142,25 +198,11 @@ static void input_repeat_key(unsigned long data) spin_unlock_irqrestore(&dev->event_lock, flags); } -static void input_start_autorepeat(struct input_dev *dev, int code) -{ - if (test_bit(EV_REP, dev->evbit) && - dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && - dev->timer.data) { - dev->repeat_key = code; - mod_timer(&dev->timer, - jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); - } -} - -static void input_stop_autorepeat(struct input_dev *dev) -{ - del_timer(&dev->timer); -} - #define INPUT_IGNORE_EVENT 0 #define INPUT_PASS_TO_HANDLERS 1 #define INPUT_PASS_TO_DEVICE 2 +#define INPUT_SLOT 4 +#define INPUT_FLUSH 8 #define INPUT_PASS_TO_ALL (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE) static int input_handle_abs_event(struct input_dev *dev, @@ -207,14 +249,14 @@ static int input_handle_abs_event(struct input_dev *dev, /* Flush pending "slot" event */ if (is_mt_event && dev->slot != input_abs_get_val(dev, ABS_MT_SLOT)) { input_abs_set_val(dev, ABS_MT_SLOT, dev->slot); - input_pass_event(dev, EV_ABS, ABS_MT_SLOT, dev->slot); + return INPUT_PASS_TO_HANDLERS | INPUT_SLOT; } return INPUT_PASS_TO_HANDLERS; } -static void input_handle_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) +static int input_get_disposition(struct input_dev *dev, + unsigned int type, unsigned int code, int value) { int disposition = INPUT_IGNORE_EVENT; @@ -227,13 +269,9 @@ static void input_handle_event(struct input_dev *dev, break; case SYN_REPORT: - if (!dev->sync) { - dev->sync = true; - disposition = INPUT_PASS_TO_HANDLERS; - } + disposition = INPUT_PASS_TO_HANDLERS | INPUT_FLUSH; break; case SYN_MT_REPORT: - dev->sync = false; disposition = INPUT_PASS_TO_HANDLERS; break; } @@ -317,14 +355,48 @@ static void input_handle_event(struct input_dev *dev, break; } - if (disposition != INPUT_IGNORE_EVENT && type != EV_SYN) - dev->sync = false; + return disposition; +} + +static void input_handle_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + int disposition; + + disposition = input_get_disposition(dev, type, code, value); if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) dev->event(dev, type, code, value); - if (disposition & INPUT_PASS_TO_HANDLERS) - input_pass_event(dev, type, code, value); + if (!dev->vals) + return; + + if (disposition & INPUT_PASS_TO_HANDLERS) { + struct input_value *v; + + if (disposition & INPUT_SLOT) { + v = &dev->vals[dev->num_vals++]; + v->type = EV_ABS; + v->code = ABS_MT_SLOT; + v->value = dev->slot; + } + + v = &dev->vals[dev->num_vals++]; + v->type = type; + v->code = code; + v->value = value; + } + + if (disposition & INPUT_FLUSH) { + if (dev->num_vals >= 2) + input_pass_values(dev, dev->vals, dev->num_vals); + dev->num_vals = 0; + } else if (dev->num_vals >= dev->max_vals - 2) { + dev->vals[dev->num_vals++] = input_value_sync; + input_pass_values(dev, dev->vals, dev->num_vals); + dev->num_vals = 0; + } + } /** @@ -352,7 +424,6 @@ void input_event(struct input_dev *dev, if (is_event_supported(type, dev->evbit, EV_MAX)) { spin_lock_irqsave(&dev->event_lock, flags); - add_input_randomness(type, code, value); input_handle_event(dev, type, code, value); spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -831,10 +902,12 @@ int input_set_keycode(struct input_dev *dev, if (test_bit(EV_KEY, dev->evbit) && !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && __test_and_clear_bit(old_keycode, dev->key)) { + struct input_value vals[] = { + { EV_KEY, old_keycode, 0 }, + input_value_sync + }; - input_pass_event(dev, EV_KEY, old_keycode, 0); - if (dev->sync) - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); + input_pass_values(dev, vals, ARRAY_SIZE(vals)); } out: @@ -1416,6 +1489,7 @@ static void input_dev_release(struct device *device) input_ff_destroy(dev); input_mt_destroy_slots(dev); kfree(dev->absinfo); + kfree(dev->vals); kfree(dev); module_put(THIS_MODULE); @@ -1778,6 +1852,9 @@ static unsigned int input_estimate_events_per_packet(struct input_dev *dev) if (test_bit(i, dev->relbit)) events++; + /* Make room for KEY and MSC events */ + events += 7; + return events; } @@ -1816,6 +1893,7 @@ int input_register_device(struct input_dev *dev) { static atomic_t input_no = ATOMIC_INIT(0); struct input_handler *handler; + unsigned int packet_size; const char *path; int error; @@ -1828,9 +1906,14 @@ int input_register_device(struct input_dev *dev) /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); - if (!dev->hint_events_per_packet) - dev->hint_events_per_packet = - input_estimate_events_per_packet(dev); + packet_size = input_estimate_events_per_packet(dev); + if (dev->hint_events_per_packet < packet_size) + dev->hint_events_per_packet = packet_size; + + dev->max_vals = max(dev->hint_events_per_packet, packet_size) + 2; + dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL); + if (!dev->vals) + return -ENOMEM; /* * If delay and period are pre-set by the driver, then autorepeating diff --git a/include/linux/input.h b/include/linux/input.h index 48857fddf9a..cf61dc05f57 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1139,6 +1139,18 @@ struct ff_effect { #include #include +/** + * struct input_value - input value representation + * @type: type of value (EV_KEY, EV_ABS, etc) + * @code: the value code + * @value: the value + */ +struct input_value { + __u16 type; + __u16 code; + __s32 value; +}; + /** * struct input_dev - represents an input device * @name: name of the device @@ -1215,7 +1227,6 @@ struct ff_effect { * last user closes the device * @going_away: marks devices that are in a middle of unregistering and * causes input_open_device*() fail with -ENODEV. - * @sync: set to %true when there were no new events since last EV_SYN * @dev: driver model's view of this device * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held @@ -1283,12 +1294,14 @@ struct input_dev { unsigned int users; bool going_away; - bool sync; - struct device dev; struct list_head h_list; struct list_head node; + + unsigned int num_vals; + unsigned int max_vals; + struct input_value *vals; }; #define to_input_dev(d) container_of(d, struct input_dev, dev) @@ -1349,6 +1362,9 @@ struct input_handle; * @event: event handler. This method is being called by input core with * interrupts disabled and dev->event_lock spinlock held and so * it may not sleep + * @events: event sequence handler. This method is being called by + * input core with interrupts disabled and dev->event_lock + * spinlock held and so it may not sleep * @filter: similar to @event; separates normal event handlers from * "filters". * @match: called after comparing device's id with handler's id_table @@ -1385,6 +1401,8 @@ struct input_handler { void *private; void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value); + void (*events)(struct input_handle *handle, + const struct input_value *vals, unsigned int count); bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value); bool (*match)(struct input_handler *handler, struct input_dev *dev); int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id); From bcb27d916040fbbcad3fe198f11aefc854de7209 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 22:04:34 -0500 Subject: [PATCH 307/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index ac11d32a7e2..42c1084720b 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -88,6 +88,7 @@ CONFIG_RESOURCE_COUNTERS=y # CONFIG_CGROUP_PERF is not set CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y # CONFIG_BLK_CGROUP is not set # CONFIG_NAMESPACES is not set From 9b389cc5956d0c4a8dcce8cf76af58b2eab39494 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 19 Jan 2013 17:57:37 -0500 Subject: [PATCH 308/678] tegra: lower G min and touch floor min --- arch/arm/mach-tegra/tegra3_clocks.c | 2 +- drivers/cpufreq/cpufreq_touchdemand.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 43bc38f9847..9e53ff0e4de 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -4847,8 +4847,8 @@ static int clip_cpu_rate_limits( /* force idx for max LP*/ idx=5; - cpu_clk_lp->max_rate = freq_table[idx].frequency * 1000; + idx=4; cpu_clk_g->min_rate = freq_table[idx-1].frequency * 1000; data->suspend_index = idx; return 0; diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index de5df595b90..9cc22a71649 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -148,8 +148,8 @@ static struct dbs_tuners { .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, - .touch_floor_freq = 666000, - .touch_floor_time = 2000, + .touch_floor_freq = 475000, + .touch_floor_time = 1000, .touch_min_cores = 0, .touch_factor = 4, .touch_poke = 1, From c6bbf242d43c147f21db10eee80b39f6ccfb43cf Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 19 Jan 2013 20:04:49 -0500 Subject: [PATCH 309/678] [PATCH] Restrict slave mountspace so Dalvik apps can mount system-wide volumes Android 4.2 implements multi-user storage using per-process mount namespaces. Everything under "/" (the entire filesystem hierarchy) is marked as a recursive-slave mountspace for all zygote instances. This is done so that user-storage sandbox mounts under /storage/emulated are hidden from other apps and users. Unfortunately this means that any Dalvik app (actually, any program whose clone/fork ancestry includes a Dalvik zygote, which is everything except services spawned directly from init) cannot mount system-wide volumes. Thus, apps like CifsManager are effectively broken in Android 4.2, since its cifs mounts are only visible to the CifsManager app itself. All other apps see empty mountpoints instead of the mounted volume. Furthermore, Linux provides no provision for a process to "escape" a recursive-slave mountspace in versions prior to Linux 3.8 (setns syscall). Here, we restrict rootfs-slave calls to /storage (and, due to a possible kernel bug, /mnt/shell/emulated) so that Dalvik apps can mount system-wide volumes elsewhere (with appropriate permission, as in earlier versions of Android), while retaining full multi-user storage compatibility. It is made available as a kernel-based workaround for instances where libdvm can not be modified. This change requires that a tmpfs volume is mounted as /storage in init.rc. If this volume is unavailable, then we falls back to the previous behavior of marking the entire filesystem hierarchy as slave. --- fs/Kconfig | 25 +++++++++++++++++++++++++ fs/namespace.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/fs/Kconfig b/fs/Kconfig index 99453badf45..f49df69b3dd 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -281,4 +281,29 @@ endif source "fs/nls/Kconfig" source "fs/dlm/Kconfig" +config RESTRICT_ROOTFS_SLAVE + bool "Android: Restrict rootfs slave mountspace to /storage" + help + Restrict slave mountspace so Dalvik apps can mount system-wide volumes + + Android 4.2 implements multi-user storage using per-process mount + namespaces. Everything under "/" (the entire filesystem hierarchy) is + marked as a recursive-slave mountspace for all zygote instances. This is + done so that user-storage sandbox mounts under /storage/emulated are hidden + from other apps and users. Unfortunately this means that any Dalvik app + (actually, any program whose clone/fork ancestry includes a Dalvik zygote, + which is everything except services spawned directly from init) cannot + mount system-wide volumes. + + This option restricts rootfs-slave calls to /storage (and + /mnt/shell/emulated) so that Dalvik apps can mount system-wide volumes + elsewhere (with appropriate permission, as in earlier versions of Android), + while retaining full multi-user storage compatibility. It is made + available as a kernel-based workaround for instances where libdvm can not + be modified. + + This option requires that a tmpfs volume is mounted as /storage in + Android init.rc. If this volume is unavailable, then we fall back to the + previous behavior of marking the entire filesystem hierarchy as slave. + endmenu diff --git a/fs/namespace.c b/fs/namespace.c index 5e7f2e9a11c..197ff79b00b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2302,6 +2302,36 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; +#ifdef CONFIG_RESTRICT_ROOTFS_SLAVE + /* Check if this is an attempt to mark "/" as recursive-slave. */ + if (strcmp(dir_name, "/") == 0 && flags == (MS_SLAVE | MS_REC)) { + static const char storage[] = "/storage"; + static const char source[] = "/mnt/shell/emulated"; + long res; + + /* Mark /storage as recursive-slave instead. */ + if ((res = do_mount(NULL, (char *)storage, NULL, (MS_SLAVE | MS_REC), NULL)) == 0) { + /* Unfortunately bind mounts from outside /storage may retain the + * recursive-shared property (bug?). This means any additional + * namespace-specific bind mounts (e.g., /storage/emulated/0/Android/obb) + * will also appear, shared in all namespaces, at their respective source + * paths (e.g., /mnt/shell/emulated/0/Android/obb), possibly leading to + * hundreds of /proc/mounts-visible bind mounts. As a workaround, mark + * /mnt/shell/emulated also as recursive-slave so that subsequent bind + * mounts are confined to their namespaces. */ + if ((res = do_mount(NULL, (char *)source, NULL, (MS_SLAVE | MS_REC), NULL)) == 0) + /* Both paths successfully marked as slave, leave the rest of the + * filesystem hierarchy alone. */ + return 0; + else + pr_warn("Failed to mount %s as MS_SLAVE: %ld\n", source, res); + } else { + pr_warn("Failed to mount %s as MS_SLAVE: %ld\n", storage, res); + } + /* Fallback: Mark rootfs as recursive-slave as requested. */ + } +#endif + /* ... and get the mountpoint */ retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); if (retval) From 632521d0c3494af633f30a99255c432c31e2bf96 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 19 Jan 2013 20:08:08 -0500 Subject: [PATCH 310/678] Revert "[PATCH] Restrict slave mountspace so Dalvik apps can mount" This reverts commit 917259881256aaed1b78cf9d844f93ac5aba4874. --- fs/Kconfig | 25 ------------------------- fs/namespace.c | 30 ------------------------------ 2 files changed, 55 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index f49df69b3dd..99453badf45 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -281,29 +281,4 @@ endif source "fs/nls/Kconfig" source "fs/dlm/Kconfig" -config RESTRICT_ROOTFS_SLAVE - bool "Android: Restrict rootfs slave mountspace to /storage" - help - Restrict slave mountspace so Dalvik apps can mount system-wide volumes - - Android 4.2 implements multi-user storage using per-process mount - namespaces. Everything under "/" (the entire filesystem hierarchy) is - marked as a recursive-slave mountspace for all zygote instances. This is - done so that user-storage sandbox mounts under /storage/emulated are hidden - from other apps and users. Unfortunately this means that any Dalvik app - (actually, any program whose clone/fork ancestry includes a Dalvik zygote, - which is everything except services spawned directly from init) cannot - mount system-wide volumes. - - This option restricts rootfs-slave calls to /storage (and - /mnt/shell/emulated) so that Dalvik apps can mount system-wide volumes - elsewhere (with appropriate permission, as in earlier versions of Android), - while retaining full multi-user storage compatibility. It is made - available as a kernel-based workaround for instances where libdvm can not - be modified. - - This option requires that a tmpfs volume is mounted as /storage in - Android init.rc. If this volume is unavailable, then we fall back to the - previous behavior of marking the entire filesystem hierarchy as slave. - endmenu diff --git a/fs/namespace.c b/fs/namespace.c index 197ff79b00b..5e7f2e9a11c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2302,36 +2302,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; -#ifdef CONFIG_RESTRICT_ROOTFS_SLAVE - /* Check if this is an attempt to mark "/" as recursive-slave. */ - if (strcmp(dir_name, "/") == 0 && flags == (MS_SLAVE | MS_REC)) { - static const char storage[] = "/storage"; - static const char source[] = "/mnt/shell/emulated"; - long res; - - /* Mark /storage as recursive-slave instead. */ - if ((res = do_mount(NULL, (char *)storage, NULL, (MS_SLAVE | MS_REC), NULL)) == 0) { - /* Unfortunately bind mounts from outside /storage may retain the - * recursive-shared property (bug?). This means any additional - * namespace-specific bind mounts (e.g., /storage/emulated/0/Android/obb) - * will also appear, shared in all namespaces, at their respective source - * paths (e.g., /mnt/shell/emulated/0/Android/obb), possibly leading to - * hundreds of /proc/mounts-visible bind mounts. As a workaround, mark - * /mnt/shell/emulated also as recursive-slave so that subsequent bind - * mounts are confined to their namespaces. */ - if ((res = do_mount(NULL, (char *)source, NULL, (MS_SLAVE | MS_REC), NULL)) == 0) - /* Both paths successfully marked as slave, leave the rest of the - * filesystem hierarchy alone. */ - return 0; - else - pr_warn("Failed to mount %s as MS_SLAVE: %ld\n", source, res); - } else { - pr_warn("Failed to mount %s as MS_SLAVE: %ld\n", storage, res); - } - /* Fallback: Mark rootfs as recursive-slave as requested. */ - } -#endif - /* ... and get the mountpoint */ retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); if (retval) From 2b622c1e58d49e44e932d43773c2b09ea77bba1c Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 20 Jan 2013 17:09:01 -0500 Subject: [PATCH 311/678] Remove 1.9GHz as pll_x was being capped at 1.7GHz anyway. Need to look into later. --- arch/arm/mach-tegra/tegra3_clocks.c | 2 +- arch/arm/mach-tegra/tegra3_dvfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_clocks.c b/arch/arm/mach-tegra/tegra3_clocks.c index 9e53ff0e4de..3392bc7d155 100644 --- a/arch/arm/mach-tegra/tegra3_clocks.c +++ b/arch/arm/mach-tegra/tegra3_clocks.c @@ -3656,7 +3656,7 @@ static struct clk tegra_pll_x_out0 = { .ops = &tegra_pll_div_ops, .flags = DIV_2 | PLLX, .parent = &tegra_pll_x, - .max_rate = 850000000, + .max_rate = 950000000, }; diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index cfbdc261dc4..1d37c4c4a20 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -176,7 +176,7 @@ static struct dvfs cpu_dvfs_table[] = { /* Nexus 7 - faking speedo id = 4, process id =2*/ /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ - CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700, 1700, 1800, 1900), + CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700), CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), From 7692134d29f50c547e374daf869108ff9f113584 Mon Sep 17 00:00:00 2001 From: imoseyon Date: Sat, 5 Jan 2013 12:09:43 -0800 Subject: [PATCH 312/678] block: Add support for reinsert a dispatched req Add support for reinserting a dispatched request back to the scheduler's internal data structures. This capability is used by the device driver when it chooses to interrupt the current request transmission and execute another (more urgent) pending request. For example: interrupting long write in order to handle pending read. The device driver re-inserts the remaining write request back to the scheduler, to be rescheduled for transmission later on. Add API for verifying whether the current scheduler supports reinserting requests mechanism. If reinsert mechanism isn't supported by the scheduler, this code path will never be activated. Signed-off-by: Tatyana Brokhman Conflicts: include/linux/elevator.h --- block/blk-core.c | 44 ++++++++++++++++++++++++++++++++++++++++ block/elevator.c | 35 ++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 2 ++ include/linux/elevator.h | 4 ++++ 4 files changed, 85 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 8fc4ae28a19..0fbf3d9cffe 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -929,6 +929,50 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(blk_requeue_request); +/** + * blk_reinsert_request() - Insert a request back to the scheduler + * @q: request queue + * @rq: request to be inserted + * + * This function inserts the request back to the scheduler as if + * it was never dispatched. + * + * Return: 0 on success, error code on fail + */ +int blk_reinsert_request(struct request_queue *q, struct request *rq) +{ + if (unlikely(!rq) || unlikely(!q)) + return -EIO; + + blk_delete_timer(rq); + blk_clear_rq_complete(rq); + trace_block_rq_requeue(q, rq); + + if (blk_rq_tagged(rq)) + blk_queue_end_tag(q, rq); + + BUG_ON(blk_queued_rq(rq)); + + return elv_reinsert_request(q, rq); +} +EXPORT_SYMBOL(blk_reinsert_request); + +/** + * blk_reinsert_req_sup() - check whether the scheduler supports + * reinsertion of requests + * @q: request queue + * + * Returns true if the current scheduler supports reinserting + * request. False otherwise + */ +bool blk_reinsert_req_sup(struct request_queue *q) +{ + if (unlikely(!q)) + return false; + return q->elevator->type->ops.elevator_reinsert_req_fn ? true : false; +} +EXPORT_SYMBOL(blk_reinsert_req_sup); + static void add_acct_request(struct request_queue *q, struct request *rq, int where) { diff --git a/block/elevator.c b/block/elevator.c index 979bab9bf35..05bacea35a9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -611,6 +611,41 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); } +/** + * elv_reinsert_request() - Insert a request back to the scheduler + * @q: request queue where request should be inserted + * @rq: request to be inserted + * + * This function returns the request back to the scheduler to be + * inserted as if it was never dispatched + * + * Return: 0 on success, error code on failure + */ +int elv_reinsert_request(struct request_queue *q, struct request *rq) +{ + int res; + + if (!q->elevator->type->ops.elevator_reinsert_req_fn) + return -EPERM; + + res = q->elevator->type->ops.elevator_reinsert_req_fn(q, rq); + if (!res) { + /* + * it already went through dequeue, we need to decrement the + * in_flight count again + */ + if (blk_account_rq(rq)) { + q->in_flight[rq_is_sync(rq)]--; + if (rq->cmd_flags & REQ_SORTED) + elv_deactivate_rq(q, rq); + } + rq->cmd_flags &= ~REQ_STARTED; + q->nr_sorted++; + } + + return res; +} + void elv_drain_elevator(struct request_queue *q) { static int printed; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5e30b45d3d6..9d81d764503 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -662,6 +662,8 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); +extern int blk_reinsert_request(struct request_queue *q, struct request *rq); +extern bool blk_reinsert_req_sup(struct request_queue *q); extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d800d514218..d838c844396 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -20,6 +20,7 @@ typedef void (elevator_bio_merged_fn) (struct request_queue *, typedef int (elevator_dispatch_fn) (struct request_queue *, int); typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); +typedef int (elevator_reinsert_req_fn) (struct request_queue *, struct request *); typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); @@ -42,6 +43,8 @@ struct elevator_ops elevator_dispatch_fn *elevator_dispatch_fn; elevator_add_req_fn *elevator_add_req_fn; + elevator_reinsert_req_fn *elevator_reinsert_req_fn; + elevator_activate_req_fn *elevator_activate_req_fn; elevator_deactivate_req_fn *elevator_deactivate_req_fn; @@ -109,6 +112,7 @@ extern void elv_merged_request(struct request_queue *, struct request *, int); extern void elv_bio_merged(struct request_queue *q, struct request *, struct bio *); extern void elv_requeue_request(struct request_queue *, struct request *); +extern int elv_reinsert_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); From 07cf961796f892fcaba25f88184c42ccf126d940 Mon Sep 17 00:00:00 2001 From: imoseyon Date: Sat, 5 Jan 2013 12:11:37 -0800 Subject: [PATCH 313/678] block: Add API for urgent request handling This patch add support in block & elevator layers for handling urgent requests. The decision if a request is urgent or not is taken by the scheduler. Urgent request notification is passed to the underlying block device driver (eMMC for example). Block device driver may decide to interrupt the currently running low priority request to serve the new urgent request. By doing so READ latency is greatly reduced in read&write collision scenarios. Note that if the current scheduler doesn't implement the urgent request mechanism, this code path is never activated. Signed-off-by: Tatyana Brokhman Conflicts: include/linux/elevator.h --- block/blk-core.c | 26 ++++++++++++++++++++++++-- block/blk-settings.c | 12 ++++++++++++ block/blk.h | 11 +++++++++++ block/elevator.c | 5 +++++ include/linux/blkdev.h | 4 ++++ include/linux/elevator.h | 2 ++ 6 files changed, 58 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 0fbf3d9cffe..1000fc82d4d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -296,13 +296,26 @@ EXPORT_SYMBOL(blk_sync_queue); * Description: * See @blk_run_queue. This variant must be called with the queue lock * held and interrupts disabled. + * Device driver will be notified of an urgent request + * pending under the following conditions: + * 1. The driver and the current scheduler support urgent reques handling + * 2. There is an urgent request pending in the scheduler + * 3. There isn't already an urgent request in flight, meaning previously + * notified urgent request completed (!q->notified_urgent) */ void __blk_run_queue(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; - q->request_fn(q); + if (!q->notified_urgent && + q->elevator->type->ops.elevator_is_urgent_fn && + q->urgent_request_fn && + q->elevator->type->ops.elevator_is_urgent_fn(q)) { + q->notified_urgent = true; + q->urgent_request_fn(q); + } else + q->request_fn(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -2023,8 +2036,17 @@ struct request *blk_fetch_request(struct request_queue *q) struct request *rq; rq = blk_peek_request(q); - if (rq) + if (rq) { + /* + * Assumption: the next request fetched from scheduler after we + * notified "urgent request pending" - will be the urgent one + */ + if (q->notified_urgent && !q->dispatched_urgent) { + q->dispatched_urgent = true; + (void)blk_mark_rq_urgent(rq); + } blk_start_request(rq); + } return rq; } EXPORT_SYMBOL(blk_fetch_request); diff --git a/block/blk-settings.c b/block/blk-settings.c index fa1eb0449a0..7d3ee7fa50d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -99,6 +99,18 @@ void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) } EXPORT_SYMBOL_GPL(blk_queue_lld_busy); +/** + * blk_urgent_request() - Set an urgent_request handler function for queue + * @q: queue + * @fn: handler for urgent requests + * + */ +void blk_urgent_request(struct request_queue *q, request_fn_proc *fn) +{ + q->urgent_request_fn = fn; +} +EXPORT_SYMBOL(blk_urgent_request); + /** * blk_set_default_limits - reset limits to default values * @lim: the queue_limits structure to reset diff --git a/block/blk.h b/block/blk.h index 20b900a377c..7837cec9097 100644 --- a/block/blk.h +++ b/block/blk.h @@ -30,6 +30,7 @@ void __generic_unplug_device(struct request_queue *); */ enum rq_atomic_flags { REQ_ATOM_COMPLETE = 0, + REQ_ATOM_URGENT = 1, }; /* @@ -46,6 +47,16 @@ static inline void blk_clear_rq_complete(struct request *rq) clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); } +static inline int blk_mark_rq_urgent(struct request *rq) +{ + return test_and_set_bit(REQ_ATOM_URGENT, &rq->atomic_flags); +} + +static inline void blk_clear_rq_urgent(struct request *rq) +{ + clear_bit(REQ_ATOM_URGENT, &rq->atomic_flags); +} + /* * Internal elevator interface */ diff --git a/block/elevator.c b/block/elevator.c index 05bacea35a9..e48b0ff01dd 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -850,6 +850,11 @@ void elv_completed_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; + if (test_bit(REQ_ATOM_URGENT, &rq->atomic_flags)) { + q->notified_urgent = false; + q->dispatched_urgent = false; + blk_clear_rq_urgent(rq); + } /* * request is released from the driver, io must be done */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9d81d764503..94d1b95c69f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -277,6 +277,7 @@ struct request_queue { struct request_list rq; request_fn_proc *request_fn; + request_fn_proc *urgent_request_fn; make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unprep_rq_fn *unprep_rq_fn; @@ -352,6 +353,8 @@ struct request_queue { struct list_head timeout_list; struct queue_limits limits; + bool notified_urgent; + bool dispatched_urgent; /* * sg stuff @@ -808,6 +811,7 @@ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern struct request_queue *blk_init_allocated_queue(struct request_queue *, request_fn_proc *, spinlock_t *); +extern void blk_urgent_request(struct request_queue *q, request_fn_proc *fn); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d838c844396..858993b1f83 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -21,6 +21,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int); typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); typedef int (elevator_reinsert_req_fn) (struct request_queue *, struct request *); +typedef bool (elevator_is_urgent_fn) (struct request_queue *); typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); @@ -44,6 +45,7 @@ struct elevator_ops elevator_dispatch_fn *elevator_dispatch_fn; elevator_add_req_fn *elevator_add_req_fn; elevator_reinsert_req_fn *elevator_reinsert_req_fn; + elevator_is_urgent_fn *elevator_is_urgent_fn; elevator_activate_req_fn *elevator_activate_req_fn; elevator_deactivate_req_fn *elevator_deactivate_req_fn; From 3f8d0f99f66f762c5e677d279361c2db67b38135 Mon Sep 17 00:00:00 2001 From: imoseyon Date: Sat, 5 Jan 2013 12:15:21 -0800 Subject: [PATCH 314/678] row: Adding support for reinsert already dispatched req Add support for reinserting already dispatched request back to the schedulers internal data structures. The request will be reinserted back to the queue (head) it was dispatched from as if it was never dispatched. Signed-off-by: Tatyana Brokhman --- block/row-iosched.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/block/row-iosched.c b/block/row-iosched.c index a76cad30084..e3ad8acd6af 100644 --- a/block/row-iosched.c +++ b/block/row-iosched.c @@ -274,7 +274,39 @@ static void row_add_request(struct request_queue *q, row_log_rowq(rd, rqueue->prio, "added request"); } -/* +/** + * row_reinsert_req() - Reinsert request back to the scheduler + * @q: requests queue + * @rq: request to add + * + * Reinsert the given request back to the queue it was + * dispatched from as if it was never dispatched. + * + * Returns 0 on success, error code otherwise + */ +static int row_reinsert_req(struct request_queue *q, + struct request *rq) +{ + struct row_data *rd = q->elevator->elevator_data; + struct row_queue *rqueue = RQ_ROWQ(rq); + + /* Verify rqueue is legitimate */ + if (rqueue->prio >= ROWQ_MAX_PRIO) { + pr_err("\n\nROW BUG: row_reinsert_req() rqueue->prio = %d\n", + rqueue->prio); + blk_dump_rq_flags(rq, ""); + return -EIO; + } + + list_add(&rq->queuelist, &rqueue->fifo); + rd->nr_reqs[rq_data_dir(rq)]++; + + row_log_rowq(rd, rqueue->prio, "request reinserted"); + + return 0; +} + +/** * row_remove_request() - Remove given request from scheduler * @q: requests queue * @rq: request to remove @@ -664,6 +696,7 @@ static struct elevator_type iosched_row = { .elevator_merge_req_fn = row_merged_requests, .elevator_dispatch_fn = row_dispatch_requests, .elevator_add_req_fn = row_add_request, + .elevator_reinsert_req_fn = row_reinsert_req, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, .elevator_set_req_fn = row_set_request, From a2bcc9e0b2febde3743853fb366278452ba1a99c Mon Sep 17 00:00:00 2001 From: imoseyon Date: Sat, 5 Jan 2013 12:16:01 -0800 Subject: [PATCH 315/678] row: Add support for urgent request handling This patch add support for handling urgent requests. ROW queue can be marked as "urgent". If an urgent queue was un-served in a previous dispatch cycle and a request was added to it - it will trigger issuing urgent request to the device driver. Signed-off-by: Tatyana Brokhman --- block/row-iosched.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/block/row-iosched.c b/block/row-iosched.c index e3ad8acd6af..56d326919c5 100644 --- a/block/row-iosched.c +++ b/block/row-iosched.c @@ -58,6 +58,17 @@ static const bool queue_idling_enabled[] = { false, /* ROWQ_PRIO_LOW_SWRITE */ }; +/* Flags indicating whether the queue can notify on urgent requests */ +static const bool urgent_queues[] = { + true, /* ROWQ_PRIO_HIGH_READ */ + true, /* ROWQ_PRIO_REG_READ */ + false, /* ROWQ_PRIO_HIGH_SWRITE */ + false, /* ROWQ_PRIO_REG_SWRITE */ + false, /* ROWQ_PRIO_REG_WRITE */ + false, /* ROWQ_PRIO_LOW_READ */ + false, /* ROWQ_PRIO_LOW_SWRITE */ +}; + /* Default values for row queues quantums in each dispatch cycle */ static const int queue_quantum[] = { 100, /* ROWQ_PRIO_HIGH_READ */ @@ -271,7 +282,13 @@ static void row_add_request(struct request_queue *q, rqueue->idle_data.last_insert_time = ktime_get(); } - row_log_rowq(rd, rqueue->prio, "added request"); + if (urgent_queues[rqueue->prio] && + row_rowq_unserved(rd, rqueue->prio)) { + row_log_rowq(rd, rqueue->prio, + "added urgent req curr_queue = %d", + rd->curr_queue); + } else + row_log_rowq(rd, rqueue->prio, "added request"); } /** @@ -306,6 +323,29 @@ static int row_reinsert_req(struct request_queue *q, return 0; } +/* + * row_urgent_pending() - Return TRUE if there is an urgent + * request on scheduler + * @q: requests queue + * + */ +static bool row_urgent_pending(struct request_queue *q) +{ + struct row_data *rd = q->elevator->elevator_data; + int i; + + for (i = 0; i < ROWQ_MAX_PRIO; i++) + if (urgent_queues[i] && row_rowq_unserved(rd, i) && + !list_empty(&rd->row_queues[i].rqueue.fifo)) { + row_log_rowq(rd, i, + "Urgent request pending (curr=%i)", + rd->curr_queue); + return true; + } + + return false; +} + /** * row_remove_request() - Remove given request from scheduler * @q: requests queue @@ -697,6 +737,7 @@ static struct elevator_type iosched_row = { .elevator_dispatch_fn = row_dispatch_requests, .elevator_add_req_fn = row_add_request, .elevator_reinsert_req_fn = row_reinsert_req, + .elevator_is_urgent_fn = row_urgent_pending, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, .elevator_set_req_fn = row_set_request, From 3c5777375aad856a7b5c8a7c90ea1436b8701405 Mon Sep 17 00:00:00 2001 From: franciscofranco Date: Sat, 19 Jan 2013 01:39:06 -0800 Subject: [PATCH 316/678] block: fixes required to make the kernel compile with ROW. Signed-off-by: franciscofranco --- block/blk-core.c | 6 +++--- block/elevator.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 1000fc82d4d..745dc60aa84 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -309,9 +309,9 @@ void __blk_run_queue(struct request_queue *q) return; if (!q->notified_urgent && - q->elevator->type->ops.elevator_is_urgent_fn && + q->elevator->elevator_type->ops.elevator_is_urgent_fn && q->urgent_request_fn && - q->elevator->type->ops.elevator_is_urgent_fn(q)) { + q->elevator->elevator_type->ops.elevator_is_urgent_fn(q)) { q->notified_urgent = true; q->urgent_request_fn(q); } else @@ -982,7 +982,7 @@ bool blk_reinsert_req_sup(struct request_queue *q) { if (unlikely(!q)) return false; - return q->elevator->type->ops.elevator_reinsert_req_fn ? true : false; + return q->elevator->elevator_type->ops.elevator_reinsert_req_fn ? true : false; } EXPORT_SYMBOL(blk_reinsert_req_sup); diff --git a/block/elevator.c b/block/elevator.c index e48b0ff01dd..eb3056e0ee9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -625,10 +625,10 @@ int elv_reinsert_request(struct request_queue *q, struct request *rq) { int res; - if (!q->elevator->type->ops.elevator_reinsert_req_fn) + if (!q->elevator->elevator_type->ops.elevator_reinsert_req_fn) return -EPERM; - res = q->elevator->type->ops.elevator_reinsert_req_fn(q, rq); + res = q->elevator->elevator_type->ops.elevator_reinsert_req_fn(q, rq); if (!res) { /* * it already went through dequeue, we need to decrement the From 6fea6572b0c15f10123bebdb860168b6887ac201 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 21 Jan 2013 13:53:38 -0500 Subject: [PATCH 317/678] cpufreq: interactive: update and tweak --- drivers/cpufreq/cpufreq_interactive.c | 28 ++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 93cc5fa10ff..7678d48ab4c 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -35,7 +35,7 @@ #define CREATE_TRACE_POINTS #include -static atomic_t active_count = ATOMIC_INIT(0); +static int active_count; struct cpufreq_interactive_cpuinfo { struct timer_list cpu_timer; @@ -61,16 +61,17 @@ static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo); static struct task_struct *speedchange_task; static cpumask_t speedchange_cpumask; static spinlock_t speedchange_cpumask_lock; +static struct mutex gov_lock; /* Hi speed to bump to from lo speed when load burst (default max) */ -static unsigned int hispeed_freq; +static unsigned int hispeed_freq = 1200; /* Go to hi speed when CPU load at or above this value. */ #define DEFAULT_GO_HISPEED_LOAD 99 static unsigned long go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; /* Target load. Lower values result in higher CPU speeds. */ -#define DEFAULT_TARGET_LOAD 90 +#define DEFAULT_TARGET_LOAD 95 static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; static spinlock_t target_loads_lock; static unsigned int *target_loads = default_target_loads; @@ -98,7 +99,7 @@ static unsigned long above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY; /* * Boost pulse to hispeed on touchscreen input. */ -static int input_boost_val = 1; +static int input_boost_val; struct cpufreq_interactive_inputopen { struct input_handle *handle; @@ -1039,6 +1040,8 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, if (!cpu_online(policy->cpu)) return -EINVAL; + mutex_lock(&gov_lock); + freq_table = cpufreq_frequency_get_table(policy->cpu); if (!hispeed_freq) @@ -1073,13 +1076,17 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, * Do not register the idle hook and create sysfs * entries if we have already done so. */ - if (atomic_inc_return(&active_count) > 1) + if (++active_count > 1) { + mutex_unlock(&gov_lock); return 0; + } rc = sysfs_create_group(cpufreq_global_kobject, &interactive_attr_group); - if (rc) + if (rc) { + mutex_unlock(&gov_lock); return rc; + } rc = input_register_handler(&cpufreq_interactive_input_handler); if (rc) @@ -1089,9 +1096,11 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, idle_notifier_register(&cpufreq_interactive_idle_nb); cpufreq_register_notifier( &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); + mutex_unlock(&gov_lock); break; case CPUFREQ_GOV_STOP: + mutex_lock(&gov_lock); for_each_cpu(j, policy->cpus) { pcpu = &per_cpu(cpuinfo, j); down_write(&pcpu->enable_sem); @@ -1102,8 +1111,11 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, } flush_work(&inputopen.inputopen_work); - if (atomic_dec_return(&active_count) > 0) + + if (--active_count > 0) { + mutex_unlock(&gov_lock); return 0; + } cpufreq_unregister_notifier( &cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); @@ -1111,6 +1123,7 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *policy, input_unregister_handler(&cpufreq_interactive_input_handler); sysfs_remove_group(cpufreq_global_kobject, &interactive_attr_group); + mutex_unlock(&gov_lock); break; @@ -1150,6 +1163,7 @@ static int __init cpufreq_interactive_init(void) spin_lock_init(&target_loads_lock); spin_lock_init(&speedchange_cpumask_lock); + mutex_init(&gov_lock); speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, NULL, "cfinteractive"); From e92797000b5082518eb252588d62e07339a7b9fb Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 16 Dec 2012 22:19:47 -0800 Subject: [PATCH 318/678] arm: tegra: Set Core to 0.95V in LP1 When the device prepares for LP1, the Core voltage is set to the highest value(1.2V for Enterprise and Kai, and 1.3V for AP37 and Cardhu). This is to set for some of the driver suspend along the sequence need a higher emc frequency and thus a higher Core voltage. Since the sequence of drivers suspend depends on the sequence of their registration in the table, which in turn is platform-dependent, there is no right place in the LP1 entry path where the Core voltage can be set to a lower voltage. Hence, the Core voltage remains high in LP1 resulting in higher power. Thus, the only safe location where the Core voltage can be lowered is once all the drivers are suspended and the DRAM is set in self-refresh, at the final point just before the system is suspended in the IRAM code. This location at the assembly code ensures that no other module will be running and thus that nothing will require a higher core voltage. The Core is set to the lowest possible value since nothing requires it. It is then restored to the highest voltage as soon as the LP1 resume code is started so that all drivers are resumed safely. At the execution point in IRAM during LP1 suspend path, even the I2C clocks are gated. They must be reset first and then the I2C transaction is performed. An I2C transaction involves 4 bytes of data, to send the slave address, the Core voltage register address and 2 bytes of data which has the value to set the voltage(the second byte is not required for this transaction). Once these registers are set, the I2C transaction is performed by setting the I2C transaction register to 0xA02. After sending the I2C transaction, we wait for about 250us to check the status of the transaction and if not updated, wait for more time to check again. If after 2ms and the transaction fails to register, the transaction is aborted and the device is allowed to enter at high voltage. Since the failure rate of I2C transaction is very low at this point in execution where there will be no conflicts in the bus, it is okay to have Core high for some of the LP1 cycles. However, it is unacceptable for the I2C transaction to fail on the way from LP1 resume since the device cannot come up with a lower Core voltage. In this case, the transaction is retried again and again till it is successful. There is no way but to keep trying as the device would fail to resume with Core at 0.95V. Each platform(or each PMU) has different values for the I2C transaction ie. slave address, Core voltage register and the value to set the voltage. For the device in IRAM, it cannot access anything in SDRAM memory, these values needs to be pushed to IRAM memory before the device starts execution in IRAM. This is done during initialization of suspend code when it picks values from the board files and copies it to IRAM part of code, before the whole memory is copied to IRAM. This new feature is controlled by a KConfig variable TEGRA_LP1_950 which should be enabled once the board file of the device is updated with the right values. The device hangs when it does not have the right values for the I2C transaction. With this change in Core, LP1 power is reduced by 12mW in Enterprise, 20mW in AP37 and about 24mW in Kai. Bug 1035684 Change-Id: I4318c66fd70ab227ef0786d6a13286e020e4541d Signed-off-by: Karthik Ramakrishnan (cherry picked from commit ab476f287376fd0ae51a9f298659f5eba19f0296) Reviewed-on: http://git-master/r/124779 Reviewed-by: Lokesh Pathak Tested-by: Lokesh Pathak adapted to use for Nexus Grouper from NV reference Signed-off-by: faux123 Conflicts: arch/arm/mach-tegra/Kconfig --- arch/arm/mach-tegra/Kconfig | 14 +-- arch/arm/mach-tegra/pm.c | 15 ++++ arch/arm/mach-tegra/pm.h | 8 ++ arch/arm/mach-tegra/sleep-t3.S | 150 ++++++++++++++++++++++++++++++++- arch/arm/mach-tegra/sleep.h | 40 +++++++++ 5 files changed, 221 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 6fdaaff051e..d3b5e5fd94e 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -45,8 +45,7 @@ config ARCH_TEGRA_3x_SOC select PCI_MSI if TEGRA_PCI select ARM_ERRATA_754322 select ARM_ERRATA_764369 - select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD -# select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD && !TEGRA_RAIL_OFF_MULTIPLE_CPUS + select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD && !TEGRA_RAIL_OFF_MULTIPLE_CPUS select CPA help Support for NVIDIA Tegra 3 family of SoCs, based upon the @@ -508,6 +507,13 @@ config TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND help Also will restore to original cpu frequency governor when device is resumed +config TEGRA_LP1_950 + bool "LP1 low core voltage" + default n + depends on ARCH_TEGRA_3x_SOC + help + Enable support for LP1 Core voltage to set to lowest + config TEGRA_RUNNABLE_THREAD bool "Tegra3 runnable thread hotplug" depends on ARCH_TEGRA_3x_SOC @@ -600,9 +606,7 @@ config TEGRA_LP2_ARM_TWD bool config TEGRA_RAIL_OFF_MULTIPLE_CPUS - bool "Tegra Rail Off Multiple CPUs" - depends on TEGRA_SILICON_PLATFORM - default n + bool config TEGRA_SLOW_CSITE bool "lower csite clock to 1 Mhz to reduce its power consumption" diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index 7d2d1016843..dfe13e28745 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -1106,6 +1106,21 @@ void __init tegra_init_suspend(struct tegra_suspend_platform_data *plat) plat->suspend_mode = TEGRA_SUSPEND_LP2; } +#ifdef CONFIG_TEGRA_LP1_950 + if (pdata->lp1_lowvolt_support) { + u32 lp1_core_lowvolt, lp1_core_highvolt; + memcpy(tegra_lp1_register_pmuslave_addr(), &pdata->pmuslave_addr, 4); + memcpy(tegra_lp1_register_i2c_base_addr(), &pdata->i2c_base_addr, 4); + + lp1_core_lowvolt = 0; + lp1_core_lowvolt = (pdata->lp1_core_volt_low << 8) | pdata->core_reg_addr; + memcpy(tegra_lp1_register_core_lowvolt(), &lp1_core_lowvolt, 4); + + lp1_core_highvolt = 0; + lp1_core_highvolt = (pdata->lp1_core_volt_high << 8) | pdata->core_reg_addr; + memcpy(tegra_lp1_register_core_highvolt(), &lp1_core_highvolt, 4); + } +#endif /* !!!FIXME!!! THIS IS TEGRA2 ONLY */ /* Initialize scratch registers used for CPU LP2 synchronization */ writel(0, pmc + PMC_SCRATCH37); diff --git a/arch/arm/mach-tegra/pm.h b/arch/arm/mach-tegra/pm.h index 421b21ac934..8a90ac993fa 100644 --- a/arch/arm/mach-tegra/pm.h +++ b/arch/arm/mach-tegra/pm.h @@ -65,6 +65,14 @@ struct tegra_suspend_platform_data { /* lp_state = 0 for LP0 state, 1 for LP1 state, 2 for LP2 state */ void (*board_resume)(int lp_state, enum resume_stage stg); unsigned int cpu_resume_boost; /* CPU frequency resume boost in kHz */ +#ifdef CONFIG_TEGRA_LP1_950 + bool lp1_lowvolt_support; + unsigned int i2c_base_addr; + unsigned int pmuslave_addr; + unsigned int core_reg_addr; + unsigned int lp1_core_volt_low; + unsigned int lp1_core_volt_high; +#endif }; /* Tegra io dpd entry - for each supported driver */ diff --git a/arch/arm/mach-tegra/sleep-t3.S b/arch/arm/mach-tegra/sleep-t3.S index caabeb75139..ab4bfc999cd 100644 --- a/arch/arm/mach-tegra/sleep-t3.S +++ b/arch/arm/mach-tegra/sleep-t3.S @@ -94,6 +94,16 @@ #define PMC_PLLP_WB0_OVERRIDE 0xf8 #define CLK_RESET_CLK_SOURCE_MSELECT 0x3b4 +#define CLK_RESET_CLK_ENB_H_SET 0x328 +#define CLK_RESET_CLK_ENB_H_CLR 0x32c +#define CLK_RESET_CLK_RST_DEV_H_SET 0x308 +#define CLK_RESET_CLK_RST_DEV_H_CLR 0x30c + +#define I2C_CNFG 0x0 +#define I2C_ADDR0 0x4 +#define I2C_DATA1 0xc +#define I2C_DATA2 0x10 +#define I2C_STATUS 0x1c #define MSELECT_CLKM (0x3 << 30) @@ -357,6 +367,66 @@ ENTRY(tegra3_lp1_reset) mov32 r4, ((1<<28) | (8)) @ burst policy is PLLX str r4, [r0, #CLK_RESET_CCLK_BURST] +#ifdef CONFIG_TEGRA_LP1_950 +lp1_voltset: + /* Restore the Core voltage to high on LP1 resume */ + /* Reset(Enable/Disable) the DVC-I2C Controller*/ + mov r1, #(1 << 15) + str r1, [r0, #CLK_RESET_CLK_RST_DEV_H_SET] + + /* Wait for 2us */ + mov32 r7, TEGRA_TMRUS_BASE + wait_for_us r1, r7, r9 + add r1, r1, #2 + wait_until r1, r7, r9 + + mov r1, #(1 << 15) + str r1, [r0, #CLK_RESET_CLK_RST_DEV_H_CLR] + + /* Enable the DVC-I2C Controller */ + mov r1, #(1 << 15) + str r1, [r0, #CLK_RESET_CLK_ENB_H_SET] + + + /* Same I2C transaction protocol as suspend */ + ldr r1, lp1_register_pmuslave_addr + cmp r1, #0 + beq lp1_voltskip_resume + + ldr r4, lp1_register_i2c_base_addr + str r1, [r4, #I2C_ADDR0] + + mov32 r1, 0x2 + str r1, [r4, #I2C_CNFG] + + ldr r1, lp1_register_core_highvolt + str r1, [r4, #I2C_DATA1] + + mov32 r1, 0 + str r1, [r4, #I2C_DATA2] + + mov32 r1, 0xA02 + str r1, [r4, #I2C_CNFG] + + wait_for_us r1, r7, r9 + mov32 r3, 0x7D0 /* Wait for 2ms and try transaction again */ + add r0, r1, r3 +loop_i2c_status_resume: + add r1, r1, #0xFA /* Check status every 250us */ + wait_until r1, r7, r9 + cmp r0, r1 + beq lp1_voltset + + ldr r3, [r4, #I2C_STATUS] + cmp r3, #0 + bne loop_i2c_status_resume + +lp1_voltskip_resume: + /* Disable the DVC-I2C Controller */ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_ENB_H_CLR] +#endif + #if defined (CONFIG_CACHE_L2X0) /* power up L2 */ ldr r0, [r2, #PMC_PWRGATE_STATUS] @@ -501,6 +571,21 @@ tegra3_sdram_pad_address: tegra3_sdram_pad_size: .word tegra3_sdram_pad_address - tegra3_sdram_pad_save +#ifdef CONFIG_TEGRA_LP1_950 + .globl lp1_register_pmuslave_addr + .globl lp1_register_i2c_base_addr + .globl lp1_register_core_lowvolt + .globl lp1_register_core_highvolt +lp1_register_pmuslave_addr: + .word 0 +lp1_register_i2c_base_addr: + .word 0 +lp1_register_core_lowvolt: + .word 0 +lp1_register_core_highvolt: + .word 0 +#endif + /* * tegra3_tear_down_core * @@ -533,9 +618,72 @@ tegra3_cpu_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] mov pc, lr +lp1_clocks_prepare: + /* Prepare to set the Core to the lowest voltage if supported. + * Start by setting the I2C clocks to make the I2C transfer */ +#ifdef CONFIG_TEGRA_LP1_950 + /* Set up the PWR I2C GPIOs with the right masks*/ + + /* Reset(Set/Clr) the DVC-I2C Controller*/ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_RST_DEV_H_SET] + + /* Wait for 2us */ + wait_for_us r1, r7, r9 + mov32 r0, 0x7D0 + add r1, r1, r0 + wait_until r1, r7, r9 + + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_RST_DEV_H_CLR] + + /* Enable the DVC-I2C Controller */ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_ENB_H_SET] + + /* I2C transfer protocol: + * 4 packets: Slaveaddr + WriteConfigure + Data1 + Data2 */ + ldr r0, lp1_register_pmuslave_addr + cmp r0, #0 + beq lp1_volt_skip + ldr r1, lp1_register_i2c_base_addr + str r0, [r1, #I2C_ADDR0] + + mov32 r0, 0x2 + str r0, [r1, #I2C_CNFG] + + ldr r0, lp1_register_core_lowvolt + str r0, [r1, #I2C_DATA1] + + mov32 r0, 0 + str r0, [r1, #I2C_DATA2] + + /* Send I2C transaction */ + mov32 r0, 0xA02 + str r0, [r1, #I2C_CNFG] + + /* Check the transaction status before proceeding */ + wait_for_us r2, r7, r9 + mov32 r3, 0x7D0 /* Wait for 2ms for I2C transaction */ + add r3, r2, r3 +loop_i2c_status_suspend: + add r2, r2, #0xFA /* Check status every 250us */ + cmp r3, r2 + beq lp1_volt_skip /* Waited for 2ms, I2C transaction didn't take place */ + wait_until r2, r7, r9 + + ldr r0, [r1, #I2C_STATUS] + cmp r0, #0 + bne loop_i2c_status_suspend +lp1_volt_skip: + + /* Disable the DVC-I2C Controller */ + mov r0, #(1 << 15) + str r0, [r5, #CLK_RESET_CLK_ENB_H_CLR] + +#endif /* start by jumping to clkm to safely disable PLLs, then jump * to clks */ -lp1_clocks_prepare: mov r0, #(1 << 28) str r0, [r5, #CLK_RESET_SCLK_BURST] str r0, [r5, #CLK_RESET_CCLK_BURST] diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h index 59298f1efbe..ba290fdeb35 100644 --- a/arch/arm/mach-tegra/sleep.h +++ b/arch/arm/mach-tegra/sleep.h @@ -162,6 +162,10 @@ void tegra2_sleep_wfi(unsigned long v2p); #else extern void tegra3_iram_start; extern void tegra3_iram_end; +extern unsigned int lp1_register_pmuslave_addr; +extern unsigned int lp1_register_i2c_base_addr; +extern unsigned int lp1_register_core_lowvolt; +extern unsigned int lp1_register_core_highvolt; int tegra3_sleep_core_finish(unsigned long int); int tegra3_sleep_cpu_secondary_finish(unsigned long int); void tegra3_hotplug_shutdown(void); @@ -184,5 +188,41 @@ static inline void *tegra_iram_end(void) return &tegra3_iram_end; #endif } + +static inline void *tegra_lp1_register_pmuslave_addr(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_pmuslave_addr; +#endif +} + +static inline void *tegra_lp1_register_i2c_base_addr(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_i2c_base_addr; +#endif +} + +static inline void *tegra_lp1_register_core_lowvolt(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_core_lowvolt; +#endif +} + +static inline void *tegra_lp1_register_core_highvolt(void) +{ +#ifdef CONFIG_ARCH_TEGRA_2x_SOC + return NULL; +#else + return &lp1_register_core_highvolt; +#endif +} #endif #endif From 49ff48939126f3eaa3b1dd6dc6aba1f1bbf0c78d Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 16 Dec 2012 22:27:12 -0800 Subject: [PATCH 319/678] arm: tegra: Board files settings for LP1 0.95V CoreV Set the register values for each of the board files to keep the Core voltage to 0.95V in LP1. This change is only for those platforms where LP1 is supported. Enterprise and Kai are the main platforms for this change. There is no support for Cardhu for LP1 and so is left blank and the feature will be skipped for Cardhu platforms, except for AP37. AP37 with a PM269 board needs this change and so Cardhu board file is updated with the values specific to AP37. This change is part of the feature to set VCore to 0.95V Refer to http://git-master/r/124135 for more details Bug 1035684 Change-Id: I6d1d984b0e7968b441cebbc37705c25647a4a85a Signed-off-by: Karthik Ramakrishnan (cherry picked from commit b46921e475bd95e729896a6763bc94df1e03ee4a) Reviewed-on: http://git-master/r/124780 Reviewed-by: Lokesh Pathak Tested-by: Lokesh Pathak adapted for use on Grouper from NV reference Signed-off-by: faux123 --- arch/arm/mach-tegra/board-grouper-power.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/mach-tegra/board-grouper-power.c b/arch/arm/mach-tegra/board-grouper-power.c index 9780d5f555f..2c8ff7c7fc2 100755 --- a/arch/arm/mach-tegra/board-grouper-power.c +++ b/arch/arm/mach-tegra/board-grouper-power.c @@ -615,6 +615,14 @@ static struct tegra_suspend_platform_data grouper_suspend_data = { .cpu_lp2_min_residency = 2000, .board_suspend = grouper_board_suspend, .board_resume = grouper_board_resume, +#ifdef CONFIG_TEGRA_LP1_950 + .lp1_lowvolt_support = true, + .i2c_base_addr = TEGRA_I2C5_BASE, + .pmuslave_addr = 0x24, + .core_reg_addr = 0x5B, + .lp1_core_volt_low = 0x1D, + .lp1_core_volt_high = 0x33, +#endif }; int __init grouper_suspend_init(void) From fc34277da62a2a5b4cf7a183a104b5e1d1fc8e81 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 23 Jan 2013 18:18:12 -0500 Subject: [PATCH 320/678] Revert "Revert "ARM: tegra: clock: Increase boost_up_threshold for AVP clock"" This reverts commit 55c5d9c1cb6155224ffb52b14d7df61acc3d68fc. --- arch/arm/mach-tegra/tegra3_actmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_actmon.c b/arch/arm/mach-tegra/tegra3_actmon.c index 5df6ed1fc47..a76d0a963d9 100644 --- a/arch/arm/mach-tegra/tegra3_actmon.c +++ b/arch/arm/mach-tegra/tegra3_actmon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, NVIDIA Corporation. + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -532,7 +532,7 @@ static struct actmon_dev actmon_dev_avp = { .boost_freq_step = 8000, .boost_up_coef = 200, .boost_down_coef = 50, - .boost_up_threshold = 75, + .boost_up_threshold = 85, .boost_down_threshold = 50, .up_wmark_window = 1, From 67b7e94c8fb7f354d93243a9b6f1e9592ef91cef Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 23 Jan 2013 18:42:24 -0500 Subject: [PATCH 321/678] mach-tegra: cpuquiet.c: tweak delays --- arch/arm/mach-tegra/cpuquiet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/cpuquiet.c b/arch/arm/mach-tegra/cpuquiet.c index 467320e9ec4..f212e9b632a 100644 --- a/arch/arm/mach-tegra/cpuquiet.c +++ b/arch/arm/mach-tegra/cpuquiet.c @@ -38,10 +38,10 @@ #include "clock.h" #define INITIAL_STATE TEGRA_CPQ_IDLE -#define UP2G_DELAY_MS 384 -#define UP_DELAY_MS 128 -#define DOWN2LP_DELAY_MS 4224 -#define DOWN_DELAY_MS 2112 +#define UP2G_DELAY_MS 300 +#define UP_DELAY_MS 150 +#define DOWN2LP_DELAY_MS 3000 +#define DOWN_DELAY_MS 2000 static struct mutex *tegra3_cpu_lock; static struct workqueue_struct *cpuquiet_wq; From c89fbdf8cd3203970f55bfc4ce090a2082ef02f0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 1 Feb 2013 14:07:13 -0500 Subject: [PATCH 322/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 42c1084720b..40a10f3ce4d 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -88,7 +88,6 @@ CONFIG_RESOURCE_COUNTERS=y # CONFIG_CGROUP_PERF is not set CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y -CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y # CONFIG_BLK_CGROUP is not set # CONFIG_NAMESPACES is not set @@ -178,17 +177,15 @@ CONFIG_LBDAF=y CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_ROW=y -CONFIG_IOSCHED_CFQ=y -CONFIG_IOSCHED_SIO=y +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_IOSCHED_SIO is not set # CONFIG_IOSCHED_VR is not set CONFIG_IOSCHED_BFQ=y CONFIG_CGROUP_BFQIO=y CONFIG_DEFAULT_DEADLINE=y # CONFIG_DEFAULT_ROW is not set -# CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_BFQ is not set # CONFIG_DEFAULT_NOOP is not set -# CONFIG_DEFAULT_SIO is not set CONFIG_DEFAULT_IOSCHED="deadline" # CONFIG_INLINE_SPIN_TRYLOCK is not set # CONFIG_INLINE_SPIN_TRYLOCK_BH is not set @@ -350,6 +347,7 @@ CONFIG_TEGRA_MC_PROFILE=y CONFIG_TEGRA_EDP_LIMITS=y CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 # CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set +CONFIG_TEGRA_LP1_950=y CONFIG_TEGRA_RUNNABLE_THREAD=y CONFIG_TEGRA_VARIANT_INFO=y CONFIG_USB_HOTPLUG=y @@ -362,7 +360,6 @@ CONFIG_TEGRA_BB_XMM_POWER=y CONFIG_TEGRA_PLLM_RESTRICTED=y # CONFIG_TEGRA_WDT_RECOVERY is not set CONFIG_TEGRA_LP2_ARM_TWD=y -# CONFIG_TEGRA_RAIL_OFF_MULTIPLE_CPUS is not set CONFIG_TEGRA_SLOW_CSITE=y # CONFIG_TEGRA_PREINIT_CLOCKS is not set From 4a4717bba05cd66b1794c57bad42d1d692a1eee7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 Jan 2012 14:33:55 -0800 Subject: [PATCH 323/678] mm: fix NULL ptr dereference in __count_immobile_pages commit 687875fb7de4a95223af20ee024282fa9099f860 upstream. Fix the following NULL ptr dereference caused by cat /sys/devices/system/memory/memory0/removable Pid: 13979, comm: sed Not tainted 3.0.13-0.5-default #1 IBM BladeCenter LS21 -[7971PAM]-/Server Blade RIP: __count_immobile_pages+0x4/0x100 Process sed (pid: 13979, threadinfo ffff880221c36000, task ffff88022e788480) Call Trace: is_pageblock_removable_nolock+0x34/0x40 is_mem_section_removable+0x74/0xf0 show_mem_removable+0x41/0x70 sysfs_read_file+0xfe/0x1c0 vfs_read+0xc7/0x130 sys_read+0x53/0xa0 system_call_fastpath+0x16/0x1b We are crashing because we are trying to dereference NULL zone which came from pfn=0 (struct page ffffea0000000000). According to the boot log this page is marked reserved: e820 update range: 0000000000000000 - 0000000000010000 (usable) ==> (reserved) and early_node_map confirms that: early_node_map[3] active PFN ranges 1: 0x00000010 -> 0x0000009c 1: 0x00000100 -> 0x000bffa3 1: 0x00100000 -> 0x00240000 The problem is that memory_present works in PAGE_SECTION_MASK aligned blocks so the reserved range sneaks into the the section as well. This also means that free_area_init_node will not take care of those reserved pages and they stay uninitialized. When we try to read the removable status we walk through all available sections and hope that the zone is valid for all pages in the section. But this is not true in this case as the zone and nid are not initialized. We have only one node in this particular case and it is marked as node=1 (rather than 0) and that made the problem visible because page_to_nid will return 0 and there are no zones on the node. Let's check that the zone is valid and that the given pfn falls into its boundaries and mark the section not removable. This might cause some false positives, probably, but we do not have any sane way to find out whether the page is reserved by the platform or it is just not used for whatever other reasons. Signed-off-by: Michal Hocko Acked-by: Mel Gorman Cc: KAMEZAWA Hiroyuki Cc: Andrea Arcangeli Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8859578e4bd..e6c28f28ad5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5624,6 +5624,17 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count) bool is_pageblock_removable_nolock(struct page *page) { struct zone *zone = page_zone(page); + unsigned long pfn = page_to_pfn(page); + + /* + * We have to be careful here because we are iterating over memory + * sections which are not zone aware so we might end up outside of + * the zone but still within the section. + */ + if (!zone || zone->zone_start_pfn > pfn || + zone->zone_start_pfn + zone->spanned_pages <= pfn) + return false; + return __count_immobile_pages(zone, page, 0); } From 9f99fcc056d6eed3713c3bb7e47ba990b13225c3 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 24 May 2012 19:46:26 +0530 Subject: [PATCH 324/678] CPU hotplug, cpusets, suspend: Don't modify cpusets during suspend/resume commit d35be8bab9b0ce44bed4b9453f86ebf64062721e upstream. In the event of CPU hotplug, the kernel modifies the cpusets' cpus_allowed masks as and when necessary to ensure that the tasks belonging to the cpusets have some place (online CPUs) to run on. And regular CPU hotplug is destructive in the sense that the kernel doesn't remember the original cpuset configurations set by the user, across hotplug operations. However, suspend/resume (which uses CPU hotplug) is a special case in which the kernel has the responsibility to restore the system (during resume), to exactly the same state it was in before suspend. In order to achieve that, do the following: 1. Don't modify cpusets during suspend/resume. At all. In particular, don't move the tasks from one cpuset to another, and don't modify any cpuset's cpus_allowed mask. So, simply ignore cpusets during the CPU hotplug operations that are carried out in the suspend/resume path. 2. However, cpusets and sched domains are related. We just want to avoid altering cpusets alone. So, to keep the sched domains updated, build a single sched domain (containing all active cpus) during each of the CPU hotplug operations carried out in s/r path, effectively ignoring the cpusets' cpus_allowed masks. (Since userspace is frozen while doing all this, it will go unnoticed.) 3. During the last CPU online operation during resume, build the sched domains by looking up the (unaltered) cpusets' cpus_allowed masks. That will bring back the system to the same original state as it was in before suspend. Ultimately, this will not only solve the cpuset problem related to suspend resume (ie., restores the cpusets to exactly what it was before suspend, by not touching it at all) but also speeds up suspend/resume because we avoid running cpuset update code for every CPU being offlined/onlined. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20120524141611.3692.20155.stgit@srivatsabhat.in.ibm.com [Preeti U Murthy: Please apply this patch to the stable tree 3.0.y] Signed-off-by: Preeti U Murthy Signed-off-by: Ben Hutchings --- kernel/cpuset.c | 3 +++ kernel/sched.c | 40 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 10131fdaff7..11191ad8d7a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2085,6 +2085,9 @@ static void scan_for_empty_cpusets(struct cpuset *root) * (of no affect) on systems that are actively using CPU hotplug * but making no active use of cpusets. * + * The only exception to this is suspend/resume, where we don't + * modify cpusets at all. + * * This routine ensures that top_cpuset.cpus_allowed tracks * cpu_active_mask on each CPU hotplug (cpuhp) event. * diff --git a/kernel/sched.c b/kernel/sched.c index 7fe017c88b7..ffc48149de9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8008,34 +8008,66 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) } #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ +static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ + /* * Update cpusets according to cpu_active mask. If cpusets are * disabled, cpuset_update_active_cpus() becomes a simple wrapper * around partition_sched_domains(). + * + * If we come here as part of a suspend/resume, don't touch cpusets because we + * want to restore it back to its original state upon resume anyway. */ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { + case CPU_ONLINE_FROZEN: + case CPU_DOWN_FAILED_FROZEN: + + /* + * num_cpus_frozen tracks how many CPUs are involved in suspend + * resume sequence. As long as this is not the last online + * operation in the resume sequence, just build a single sched + * domain, ignoring cpusets. + */ + num_cpus_frozen--; + if (likely(num_cpus_frozen)) { + partition_sched_domains(1, NULL, NULL); + break; + } + + /* + * This is the last CPU online operation. So fall through and + * restore the original sched domains by considering the + * cpuset configurations. + */ + case CPU_ONLINE: case CPU_DOWN_FAILED: cpuset_update_active_cpus(); - return NOTIFY_OK; + break; default: return NOTIFY_DONE; } + return NOTIFY_OK; } static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: cpuset_update_active_cpus(); - return NOTIFY_OK; + break; + case CPU_DOWN_PREPARE_FROZEN: + num_cpus_frozen++; + partition_sched_domains(1, NULL, NULL); + break; default: return NOTIFY_DONE; } + return NOTIFY_OK; } static int update_runtime(struct notifier_block *nfb, From 8d56e43d45300a118deade694a5c67bfb9b22ba4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jul 2012 12:39:26 -0700 Subject: [PATCH 325/678] workqueue: perform cpu down operations from low priority cpu_notifier() commit 6575820221f7a4dd6eadecf7bf83cdd154335eda upstream. Currently, all workqueue cpu hotplug operations run off CPU_PRI_WORKQUEUE which is higher than normal notifiers. This is to ensure that workqueue is up and running while bringing up a CPU before other notifiers try to use workqueue on the CPU. Per-cpu workqueues are supposed to remain working and bound to the CPU for normal CPU_DOWN_PREPARE notifiers. This holds mostly true even with workqueue offlining running with higher priority because workqueue CPU_DOWN_PREPARE only creates a bound trustee thread which runs the per-cpu workqueue without concurrency management without explicitly detaching the existing workers. However, if the trustee needs to create new workers, it creates unbound workers which may wander off to other CPUs while CPU_DOWN_PREPARE notifiers are in progress. Furthermore, if the CPU down is cancelled, the per-CPU workqueue may end up with workers which aren't bound to the CPU. While reliably reproducible with a convoluted artificial test-case involving scheduling and flushing CPU burning work items from CPU down notifiers, this isn't very likely to happen in the wild, and, even when it happens, the effects are likely to be hidden by the following successful CPU down. Fix it by using different priorities for up and down notifiers - high priority for up operations and low priority for down operations. Workqueue cpu hotplug operations will soon go through further cleanup. Signed-off-by: Tejun Heo Acked-by: "Rafael J. Wysocki" Signed-off-by: Ben Hutchings --- include/linux/cpu.h | 5 +++-- kernel/workqueue.c | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 54d948ec49a..d732be52629 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -66,8 +66,9 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, - /* prepare workqueues for other notifiers */ - CPU_PRI_WORKQUEUE = 5, + /* bring up workqueues before normal notifiers and down after */ + CPU_PRI_WORKQUEUE_UP = 5, + CPU_PRI_WORKQUEUE_DOWN = -5, }; #define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1783aabc612..224345e63d5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3579,6 +3579,41 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, return notifier_from_errno(0); } +/* + * Workqueues should be brought up before normal priority CPU notifiers. + * This will be registered high priority CPU notifier. + */ +static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + case CPU_UP_CANCELED: + case CPU_DOWN_FAILED: + case CPU_ONLINE: + return workqueue_cpu_callback(nfb, action, hcpu); + } + return NOTIFY_OK; +} + +/* + * Workqueues should be brought down after normal priority CPU notifiers. + * This will be registered as low priority CPU notifier. + */ +static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_PREPARE: + case CPU_DYING: + case CPU_POST_DEAD: + return workqueue_cpu_callback(nfb, action, hcpu); + } + return NOTIFY_OK; +} + #ifdef CONFIG_SMP struct work_for_cpu { @@ -3772,7 +3807,8 @@ static int __init init_workqueues(void) unsigned int cpu; int i; - cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); + cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); + cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); /* initialize gcwqs */ for_each_gcwq_cpu(cpu) { From a14d26b040a77a57779aea02290a954352bc4ed2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Sep 2012 12:48:43 -0700 Subject: [PATCH 326/678] workqueue: reimplement work_on_cpu() using system_wq commit ed48ece27cd3d5ee0354c32bbaec0f3e1d4715c3 upstream. The existing work_on_cpu() implementation is hugely inefficient. It creates a new kthread, execute that single function and then let the kthread die on each invocation. Now that system_wq can handle concurrent executions, there's no advantage of doing this. Reimplement work_on_cpu() using system_wq which makes it simpler and way more efficient. stable: While this isn't a fix in itself, it's needed to fix a workqueue related bug in cpufreq/powernow-k8. AFAICS, this shouldn't break other existing users. Signed-off-by: Tejun Heo Acked-by: Jiri Kosina Cc: Linus Torvalds Cc: Bjorn Helgaas Cc: Len Brown Cc: Rafael J. Wysocki Signed-off-by: Ben Hutchings --- kernel/workqueue.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 224345e63d5..8bd8a6f811c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3617,18 +3617,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, #ifdef CONFIG_SMP struct work_for_cpu { - struct completion completion; + struct work_struct work; long (*fn)(void *); void *arg; long ret; }; -static int do_work_for_cpu(void *_wfc) +static void work_for_cpu_fn(struct work_struct *work) { - struct work_for_cpu *wfc = _wfc; + struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work); + wfc->ret = wfc->fn(wfc->arg); - complete(&wfc->completion); - return 0; } /** @@ -3643,19 +3642,11 @@ static int do_work_for_cpu(void *_wfc) */ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { - struct task_struct *sub_thread; - struct work_for_cpu wfc = { - .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), - .fn = fn, - .arg = arg, - }; + struct work_for_cpu wfc = { .fn = fn, .arg = arg }; - sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); - if (IS_ERR(sub_thread)) - return PTR_ERR(sub_thread); - kthread_bind(sub_thread, cpu); - wake_up_process(sub_thread); - wait_for_completion(&wfc.completion); + INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu); From 649468c58423455c821df912ad0dd91b7aea3437 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:45 -0700 Subject: [PATCH 327/678] workqueue: add missing smp_wmb() in process_one_work() commit 959d1af8cffc8fd38ed53e8be1cf4ab8782f9c00 upstream. WORK_STRUCT_PENDING is used to claim ownership of a work item and process_one_work() releases it before starting execution. When someone else grabs PENDING, all pre-release updates to the work item should be visible and all updates made by the new owner should happen afterwards. Grabbing PENDING uses test_and_set_bit() and thus has a full barrier; however, clearing doesn't have a matching wmb. Given the preceding spin_unlock and use of clear_bit, I don't believe this can be a problem on an actual machine and there hasn't been any related report but it still is theretically possible for clear_pending to permeate upwards and happen before work->entry update. Add an explicit smp_wmb() before work_clear_pending(). Signed-off-by: Tejun Heo Cc: Oleg Nesterov Signed-off-by: Ben Hutchings --- kernel/workqueue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8bd8a6f811c..72b1181fa80 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1862,7 +1862,9 @@ __acquires(&gcwq->lock) spin_unlock_irq(&gcwq->lock); + smp_wmb(); /* paired with test_and_set_bit(PENDING) */ work_clear_pending(work); + lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); trace_workqueue_execute_start(work); From 13187aba712e8e9f7bbd200509f4ef2895ce6d05 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 2 Sep 2012 00:28:19 +0800 Subject: [PATCH 328/678] workqueue: UNBOUND -> REBIND morphing in rebind_workers() should be atomic commit 96e65306b81351b656835c15931d1d237b252f27 upstream. The compiler may compile the following code into TWO write/modify instructions. worker->flags &= ~WORKER_UNBOUND; worker->flags |= WORKER_REBIND; so the other CPU may temporarily see worker->flags which doesn't have either WORKER_UNBOUND or WORKER_REBIND set and perform local wakeup prematurely. Fix it by using single explicit assignment via ACCESS_ONCE(). Because idle workers have another WORKER_NOT_RUNNING flag, this bug doesn't exist for them; however, update it to use the same pattern for consistency. tj: Applied the change to idle workers too and updated comments and patch description a bit. stable: Idle worker rebinding doesn't apply for -stable and WORKER_UNBOUND used to be WORKER_ROGUE. Updated accordingly. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 72b1181fa80..38b1c42c0b6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3432,14 +3432,17 @@ static int __cpuinit trustee_thread(void *__gcwq) for_each_busy_worker(worker, i, pos, gcwq) { struct work_struct *rebind_work = &worker->rebind_work; + unsigned long worker_flags = worker->flags; /* * Rebind_work may race with future cpu hotplug * operations. Use a separate flag to mark that - * rebinding is scheduled. + * rebinding is scheduled. The morphing should + * be atomic. */ - worker->flags |= WORKER_REBIND; - worker->flags &= ~WORKER_ROGUE; + worker_flags |= WORKER_REBIND; + worker_flags &= ~WORKER_ROGUE; + ACCESS_ONCE(worker->flags) = worker_flags; /* queue rebind_work, wq doesn't matter, use the default one */ if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, From eb787d8add9e1f3339161e3be2de9da94c281270 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 10:40:00 -0700 Subject: [PATCH 329/678] workqueue: fix possible stall on try_to_grab_pending() of a delayed work item commit 3aa62497594430ea522050b75c033f71f2c60ee6 upstream. Currently, when try_to_grab_pending() grabs a delayed work item, it leaves its linked work items alone on the delayed_works. The linked work items are always NO_COLOR and will cause future cwq_activate_first_delayed() increase cwq->nr_active incorrectly, and may cause the whole cwq to stall. For example, state: cwq->max_active = 1, cwq->nr_active = 1 one work in cwq->pool, many in cwq->delayed_works. step1: try_to_grab_pending() removes a work item from delayed_works but leaves its NO_COLOR linked work items on it. step2: Later on, cwq_activate_first_delayed() activates the linked work item increasing ->nr_active. step3: cwq->nr_active = 1, but all activated work items of the cwq are NO_COLOR. When they finish, cwq->nr_active will not be decreased due to NO_COLOR, and no further work items will be activated from cwq->delayed_works. the cwq stalls. Fix it by ensuring the target work item is activated before stealing PENDING in try_to_grab_pending(). This ensures that all the linked work items are activated without incorrectly bumping cwq->nr_active. tj: Updated comment and description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings --- kernel/workqueue.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 38b1c42c0b6..20ecb978471 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1719,10 +1719,9 @@ static void move_linked_works(struct work_struct *work, struct list_head *head, *nextp = n; } -static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +static void cwq_activate_delayed_work(struct work_struct *work) { - struct work_struct *work = list_first_entry(&cwq->delayed_works, - struct work_struct, entry); + struct cpu_workqueue_struct *cwq = get_work_cwq(work); struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); trace_workqueue_activate_work(work); @@ -1731,6 +1730,14 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) cwq->nr_active++; } +static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +{ + struct work_struct *work = list_first_entry(&cwq->delayed_works, + struct work_struct, entry); + + cwq_activate_delayed_work(work); +} + /** * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * @cwq: cwq of interest @@ -2621,6 +2628,18 @@ static int try_to_grab_pending(struct work_struct *work) smp_rmb(); if (gcwq == get_work_gcwq(work)) { debug_work_deactivate(work); + + /* + * A delayed work item cannot be grabbed directly + * because it might have linked NO_COLOR work items + * which, if left on the delayed_list, will confuse + * cwq->nr_active management later on and cause + * stall. Make sure the work item is activated + * before grabbing. + */ + if (*work_data_bits(work) & WORK_STRUCT_DELAYED) + cwq_activate_delayed_work(work); + list_del_init(&work->entry); cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work), From 26ceaa94bdf7ebdba951fff685dd531ef5b1bcd4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 2 Mar 2012 10:51:00 +0100 Subject: [PATCH 330/678] Block: use a freezable workqueue for disk-event polling commit 62d3c5439c534b0e6c653fc63e6d8c67be3a57b1 upstream. This patch (as1519) fixes a bug in the block layer's disk-events polling. The polling is done by a work routine queued on the system_nrt_wq workqueue. Since that workqueue isn't freezable, the polling continues even in the middle of a system sleep transition. Obviously, polling a suspended drive for media changes and such isn't a good thing to do; in the case of USB mass-storage devices it can lead to real problems requiring device resets and even re-enumeration. The patch fixes things by creating a new system-wide, non-reentrant, freezable workqueue and using it for disk-events polling. Signed-off-by: Alan Stern Acked-by: Tejun Heo Acked-by: Rafael J. Wysocki Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/genhd.c | 10 +++++----- include/linux/workqueue.h | 4 ++++ kernel/workqueue.c | 7 ++++++- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index d3834710b95..60c7561aba0 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1493,9 +1493,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) intv = disk_events_poll_jiffies(disk); set_timer_slack(&ev->dwork.timer, intv / 4); if (check_now) - queue_delayed_work(system_nrt_wq, &ev->dwork, 0); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); else if (intv) - queue_delayed_work(system_nrt_wq, &ev->dwork, intv); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); out_unlock: spin_unlock_irqrestore(&ev->lock, flags); } @@ -1539,7 +1539,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) ev->clearing |= mask; if (!ev->block) { cancel_delayed_work(&ev->dwork); - queue_delayed_work(system_nrt_wq, &ev->dwork, 0); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); } spin_unlock_irq(&ev->lock); } @@ -1576,7 +1576,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) /* uncondtionally schedule event check and wait for it to finish */ disk_block_events(disk); - queue_delayed_work(system_nrt_wq, &ev->dwork, 0); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); flush_delayed_work(&ev->dwork); __disk_unblock_events(disk, false); @@ -1613,7 +1613,7 @@ static void disk_events_workfn(struct work_struct *work) intv = disk_events_poll_jiffies(disk); if (!ev->block && intv) - queue_delayed_work(system_nrt_wq, &ev->dwork, intv); + queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); spin_unlock_irq(&ev->lock); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0d556deb497..e228ca9e1b5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -289,12 +289,16 @@ enum { * * system_freezable_wq is equivalent to system_wq except that it's * freezable. + * + * system_nrt_freezable_wq is equivalent to system_nrt_wq except that + * it's freezable. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_nrt_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; +extern struct workqueue_struct *system_nrt_freezable_wq; extern struct workqueue_struct * __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 20ecb978471..7a09650a64e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -253,11 +253,13 @@ struct workqueue_struct *system_long_wq __read_mostly; struct workqueue_struct *system_nrt_wq __read_mostly; struct workqueue_struct *system_unbound_wq __read_mostly; struct workqueue_struct *system_freezable_wq __read_mostly; +struct workqueue_struct *system_nrt_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_wq); EXPORT_SYMBOL_GPL(system_long_wq); EXPORT_SYMBOL_GPL(system_nrt_wq); EXPORT_SYMBOL_GPL(system_unbound_wq); EXPORT_SYMBOL_GPL(system_freezable_wq); +EXPORT_SYMBOL_GPL(system_nrt_freezable_wq); #define CREATE_TRACE_POINTS #include @@ -3872,8 +3874,11 @@ static int __init init_workqueues(void) WQ_UNBOUND_MAX_ACTIVE); system_freezable_wq = alloc_workqueue("events_freezable", WQ_FREEZABLE, 0); + system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable", + WQ_NON_REENTRANT | WQ_FREEZABLE, 0); BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || - !system_unbound_wq || !system_freezable_wq); + !system_unbound_wq || !system_freezable_wq || + !system_nrt_freezable_wq); return 0; } early_initcall(init_workqueues); From 6ca92ad6372a7aae89c2fec0d343b28a516e57c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 14 May 2012 15:04:50 -0700 Subject: [PATCH 331/678] workqueue: skip nr_running sanity check in worker_enter_idle() if trustee is active commit 544ecf310f0e7f51fa057ac2a295fc1b3b35a9d3 upstream. worker_enter_idle() has WARN_ON_ONCE() which triggers if nr_running isn't zero when every worker is idle. This can trigger spuriously while a cpu is going down due to the way trustee sets %WORKER_ROGUE and zaps nr_running. It first sets %WORKER_ROGUE on all workers without updating nr_running, releases gcwq->lock, schedules, regrabs gcwq->lock and then zaps nr_running. If the last running worker enters idle inbetween, it would see stale nr_running which hasn't been zapped yet and trigger the WARN_ON_ONCE(). Fix it by performing the sanity check iff the trustee is idle. Signed-off-by: Tejun Heo Reported-by: "Paul E. McKenney" Signed-off-by: Ben Hutchings --- kernel/workqueue.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7a09650a64e..9948537be0d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1215,8 +1215,13 @@ static void worker_enter_idle(struct worker *worker) } else wake_up_all(&gcwq->trustee_wait); - /* sanity check nr_running */ - WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle && + /* + * Sanity check nr_running. Because trustee releases gcwq->lock + * between setting %WORKER_ROGUE and zapping nr_running, the + * warning may trigger spuriously. Check iff trustee is idle. + */ + WARN_ON_ONCE(gcwq->trustee_state == TRUSTEE_DONE && + gcwq->nr_workers == gcwq->nr_idle && atomic_read(get_gcwq_nr_running(gcwq->cpu))); } From ae595d4ef38a7c00b05339b9577a96b018ed3280 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 18 Jan 2013 19:47:19 -0500 Subject: [PATCH 332/678] Revert "sched: Folding nohz load accounting more accurate" This reverts commit 807cae75e15b7be8470237f967d6809c21970725. --- include/linux/sched.h | 2 +- kernel/sched.c | 84 +-------------------------------------- kernel/time/timekeeping.c | 1 - 3 files changed, 3 insertions(+), 84 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index f85dc843da7..9311b2cf681 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -146,7 +146,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); -extern void prepare_idle_mask(unsigned long ticks); + extern unsigned long get_parent_ip(unsigned long addr); struct seq_file; diff --git a/kernel/sched.c b/kernel/sched.c index ffc48149de9..8549f866be5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3353,7 +3353,6 @@ unsigned long this_cpu_load(void) /* Variables and functions for calc_load */ static atomic_long_t calc_load_tasks; static unsigned long calc_load_update; -static unsigned long idle_mask_update; unsigned long avenrun[3]; EXPORT_SYMBOL(avenrun); @@ -3389,37 +3388,13 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) */ static atomic_long_t calc_load_tasks_idle; -/* - * Those cpus whose load alread has been calculated in this LOAD_FREQ - * period will be masked. - */ -struct cpumask cpu_load_update_mask; - -/* - * Fold unmask cpus' idle load - */ -static atomic_long_t calc_unmask_cpu_load_idle; - static void calc_load_account_idle(struct rq *this_rq) { long delta; - int cpu = smp_processor_id(); delta = calc_load_fold_active(this_rq); - if (delta) { + if (delta) atomic_long_add(delta, &calc_load_tasks_idle); - /* - * calc_unmask_cpu_load_idle is only used between the first - * cpu load accounting - * and the last cpu load accounting in every LOAD_FREQ period, - * and records idle load on - * those unmask cpus. - */ - if (!cpumask_empty(&cpu_load_update_mask) && - !cpumask_test_cpu(cpu, &cpu_load_update_mask)) { - atomic_long_add(delta, &calc_unmask_cpu_load_idle); - } - } } static long calc_load_fold_idle(void) @@ -3435,18 +3410,6 @@ static long calc_load_fold_idle(void) return delta; } -static long calc_load_fold_unmask_idle(void) -{ - long delta = 0; - - if (atomic_long_read(&calc_unmask_cpu_load_idle)) { - delta = atomic_long_xchg(&calc_unmask_cpu_load_idle, 0); - atomic_long_sub(delta, &calc_load_tasks_idle); - } - - return delta; -} - /** * fixed_power_int - compute: x^n, in O(log n) time * @@ -3541,9 +3504,6 @@ static void calc_global_nohz(unsigned long ticks) if (delta) atomic_long_add(delta, &calc_load_tasks); - cpumask_clear(&cpu_load_update_mask); - atomic_long_xchg(&calc_unmask_cpu_load_idle, 0); - /* * If we were idle for multiple load cycles, apply them. */ @@ -3601,26 +3561,6 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) loads[2] = (avenrun[2] + offset) << shift; } -/* -+ * Prepare cpu_load_update_mask for the comming per-cpu load calculating -+ */ -void prepare_idle_mask(unsigned long ticks) -{ - if (time_before(jiffies, idle_mask_update - 10)) - return; - - cpumask_clear(&cpu_load_update_mask); - /* - * calc_unmask_cpu_load_idle is part of calc_load_tasks_idle, - * and calc_load_tasks_ide will be folded into calc_load_tasks - * immediately. - * So no need to keep this now. - */ - atomic_long_xchg(&calc_unmask_cpu_load_idle, 0); - - idle_mask_update += LOAD_FREQ; -} - /* * calc_load - update the avenrun load estimates 10 ticks after the * CPUs have updated calc_load_tasks. @@ -3651,30 +3591,12 @@ void calc_global_load(unsigned long ticks) static void calc_load_account_active(struct rq *this_rq) { long delta; - int cpu = smp_processor_id(); if (time_before(jiffies, this_rq->calc_load_update)) return; - /* - * cpu_load_update_mask empty means the first cpu - * doing load calculating. Global idle should be - * folded into calc_load_tasks, so we just push it - * to calc_unmask_cpu_load_idle. - */ - if (cpumask_empty(&cpu_load_update_mask)) - atomic_long_set(&calc_unmask_cpu_load_idle, - atomic_long_read(&calc_load_tasks_idle)); - /* - * Mask this cpu as load calculated, - * then go-idle in this cpu won't take effect - * to calc_load_tasks. - */ - cpumask_set_cpu(cpu, &cpu_load_update_mask); - delta = calc_load_fold_active(this_rq); - /* Fold unmask cpus' load into calc_load_tasks */ - delta += calc_load_fold_unmask_idle(); + delta += calc_load_fold_idle(); if (delta) atomic_long_add(delta, &calc_load_tasks); @@ -8396,8 +8318,6 @@ void __init sched_init(void) calc_load_update = jiffies + LOAD_FREQ; - idle_mask_update = jiffies + LOAD_FREQ; - /* * During early bootup we pretend to be a normal task: */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ec66f787a74..00231db0eae 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1115,7 +1115,6 @@ void do_timer(unsigned long ticks) jiffies_64 += ticks; update_wall_time(); calc_global_load(ticks); - prepare_idle_mask(ticks); } /** From 01626660febb4866bb73f2de1a4c921f4646946d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 24 Aug 2011 09:37:48 +0200 Subject: [PATCH 333/678] nohz: Fix update_ts_time_stat idle accounting update_ts_time_stat currently updates idle time even if we are in iowait loop at the moment. The only real users of the idle counter (via get_cpu_idle_time_us) are CPU governors and they expect to get cumulative time for both idle and iowait times. The value (idle_sleeptime) is also printed to userspace by print_cpu but it prints both idle and iowait times so the idle part is misleading. Let's clean this up and fix update_ts_time_stat to account both counters properly and update consumers of idle to consider iowait time as well. If we do this we might use get_cpu_{idle,iowait}_time_us from other contexts as well and we will get expected values. Signed-off-by: Michal Hocko Cc: Dave Jones Cc: Arnd Bergmann Cc: Alexey Dobriyan Link: http://lkml.kernel.org/r/e9c909c221a8da402c4da07e4cd968c3218f8eb1.1314172057.git.mhocko@suse.cz Signed-off-by: Thomas Gleixner --- drivers/cpufreq/cpufreq_conservative.c | 4 +++- drivers/cpufreq/cpufreq_ondemand.c | 4 +++- kernel/time/tick-sched.c | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 33b56e5c5c1..c97b468ee9f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -120,10 +120,12 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) { - u64 idle_time = get_cpu_idle_time_us(cpu, wall); + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); return idle_time; } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 7e0cc1c88bf..76f73de99c4 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -171,10 +171,12 @@ static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) { - u64 idle_time = get_cpu_idle_time_us(cpu, wall); + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); return idle_time; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5f59e37f82..596e34593df 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -159,9 +159,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); if (nr_iowait_cpu(cpu) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); + else + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; } @@ -200,8 +201,7 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) * @last_update_time: variable to store update time in * * Return the cummulative idle time (since boot) for a given - * CPU, in microseconds. The idle time returned includes - * the iowait time (unlike what "top" and co report). + * CPU, in microseconds. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. @@ -221,7 +221,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); -/* +/** * get_cpu_iowait_time_us - get the total iowait time of a cpu * @cpu: CPU number to query * @last_update_time: variable to store update time in From bc9e877058e4e100fd9a712defbce5b9869d15b9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 24 Aug 2011 09:39:30 +0200 Subject: [PATCH 334/678] nohz: Make idle/iowait counter update conditional get_cpu_{idle,iowait}_time_us update idle/iowait counters unconditionally if the given CPU is in the idle loop. This doesn't work well outside of CPU governors which are singletons so nobody (except for IRQ) can race with them. We will need to use both functions from /proc/stat handler to properly handle nohz idle/iowait times. Make the update depend on a non NULL last_update_time argument. Signed-off-by: Michal Hocko Cc: Dave Jones Cc: Arnd Bergmann Cc: Alexey Dobriyan Link: http://lkml.kernel.org/r/11f23179472635ce52e78921d47a20216b872f23.1314172057.git.mhocko@suse.cz Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 41 ++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 596e34593df..86972ccea1f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -198,7 +198,8 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) /** * get_cpu_idle_time_us - get the total idle time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative idle time (since boot) for a given * CPU, in microseconds. @@ -211,20 +212,35 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, idle; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + idle = ts->idle_sleeptime; + } else { + if (ts->idle_active && !nr_iowait_cpu(cpu)) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + idle = ktime_add(ts->idle_sleeptime, delta); + } else { + idle = ts->idle_sleeptime; + } + } + + return ktime_to_us(idle); - return ktime_to_us(ts->idle_sleeptime); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); /** * get_cpu_iowait_time_us - get the total iowait time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative iowait time (since boot) for a given * CPU, in microseconds. @@ -237,13 +253,26 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, iowait; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + iowait = ts->iowait_sleeptime; + } else { + if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + iowait = ktime_add(ts->iowait_sleeptime, delta); + } else { + iowait = ts->iowait_sleeptime; + } + } - return ktime_to_us(ts->iowait_sleeptime); + return ktime_to_us(iowait); } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); From 581f532724bd0ec1090d40e54132211920723d09 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 23 Aug 2011 13:20:46 +0200 Subject: [PATCH 335/678] nohz: Remove "Switched to NOHz mode" debugging messages When performing cpu hotplug tests the kernel printk log buffer gets flooded with pointless "Switched to NOHz mode..." messages. Especially when afterwards analyzing a dump this might have removed more interesting stuff out of the buffer. Assuming that switching to NOHz mode simply works just remove the printk. Signed-off-by: Heiko Carstens Link: http://lkml.kernel.org/r/20110823112046.GB2540@osiris.boeblingen.de.ibm.com Signed-off-by: Thomas Gleixner Conflicts: kernel/time/tick-sched.c --- kernel/time/tick-sched.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 86972ccea1f..7e2e0817cbf 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -669,8 +669,6 @@ static void tick_nohz_switch_to_nohz(void) next = ktime_add(next, tick_period); } local_irq_enable(); - - pr_debug(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -822,10 +820,8 @@ void tick_setup_sched_timer(void) } #ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) { + if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; - pr_debug(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); - } #endif } #endif /* HIGH_RES_TIMERS */ From 3dd8d52784b41c147b542ca0d36386c6e9f3c249 Mon Sep 17 00:00:00 2001 From: "Shi, Alex" Date: Thu, 28 Jul 2011 14:56:12 +0800 Subject: [PATCH 336/678] nohz: Remove nohz_cpu_mask RCU no longer uses this global variable, nor does anyone else. This commit therefore removes this variable. This reduces memory footprint and also removes some atomic instructions and memory barriers from the dyntick-idle path. Signed-off-by: Alex Shi Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 1 - kernel/sched.c | 11 ----------- kernel/time/tick-sched.c | 6 ------ 3 files changed, 18 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 9311b2cf681..b1e33ef275b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -272,7 +272,6 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern void select_nohz_load_balancer(int stop_tick); extern int get_nohz_timer_target(void); diff --git a/kernel/sched.c b/kernel/sched.c index 8549f866be5..095abac593f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6064,15 +6064,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) ftrace_graph_init_idle_task(idle, cpu); } -/* - * In a system that switches off the HZ timer nohz_cpu_mask - * indicates which cpus entered this state. This is used - * in the rcu update to wait only for active cpus. For system - * which do not switch off the HZ timer nohz_cpu_mask should - * always be CPU_BITS_NONE. - */ -cpumask_var_t nohz_cpu_mask; - /* * Increase the granularity value when there are more CPUs, * because with more CPUs the 'effective latency' as visible @@ -8323,8 +8314,6 @@ void __init sched_init(void) */ current->sched_class = &fair_sched_class; - /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); #ifdef CONFIG_NO_HZ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7e2e0817cbf..40420644d0b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - cpumask_clear_cpu(cpu, nohz_cpu_mask); ts->idle_waketime = now; local_irq_save(flags); @@ -418,9 +417,6 @@ void tick_nohz_stop_sched_tick(int inidle) else expires.tv64 = KTIME_MAX; - if (delta_jiffies > 1) - cpumask_set_cpu(cpu, nohz_cpu_mask); - /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; @@ -470,7 +466,6 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpumask_clear_cpu(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: @@ -553,7 +548,6 @@ void tick_nohz_restart_sched_tick(void) /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); - cpumask_clear_cpu(cpu, nohz_cpu_mask); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* From f499c8441866be6671a7673833438759e820f278 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Aug 2011 22:39:02 -0700 Subject: [PATCH 337/678] rcu: Eliminate in_irq() checks in rcu_enter_nohz() The in_irq() check in rcu_enter_nohz() is redundant because if we really are in an interrupt, the attempt to re-enter dyntick-idle mode will invoke rcu_needs_cpu() in any case, which will force the check for RCU callbacks. So this commit removes the check along with the set_need_resched(). Suggested-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ba06207b1dd..bfa24b08686 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -360,13 +360,6 @@ void rcu_enter_nohz(void) smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); local_irq_restore(flags); - - /* If the interrupt queued a callback, get out of dyntick mode. */ - if (in_irq() && - (__get_cpu_var(rcu_sched_data).nxtlist || - __get_cpu_var(rcu_bh_data).nxtlist || - rcu_preempt_needs_cpu(smp_processor_id()))) - set_need_resched(); } /* From e51ddbf592cb9058bf494d0b17e9dea6150749b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 Sep 2011 13:06:17 +0200 Subject: [PATCH 338/678] sched: Convert to struct llist Use the generic llist primitives. We had a private lockless list implementation in the scheduler in the wake-list code, now that we have a generic llist implementation that provides all required operations, switch to it. This patch is not expected to change any behavior. Signed-off-by: Peter Zijlstra Cc: Huang Ying Cc: Andrew Morton Link: http://lkml.kernel.org/r/1315836353.26517.42.camel@twins Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 ++- kernel/sched.c | 48 +++++++++---------------------------------- 2 files changed, 12 insertions(+), 39 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b1e33ef275b..aabc9a7972b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -90,6 +90,7 @@ struct sched_param { #include #include #include +#include #include @@ -1226,7 +1227,7 @@ struct task_struct { unsigned int ptrace; #ifdef CONFIG_SMP - struct task_struct *wake_entry; + struct llist_node wake_entry; int on_cpu; #endif int on_rq; diff --git a/kernel/sched.c b/kernel/sched.c index 095abac593f..945908b0a8f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -576,7 +576,7 @@ struct rq { #endif #ifdef CONFIG_SMP - struct task_struct *wake_list; + struct llist_head wake_list; #endif }; @@ -2600,42 +2600,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) } #ifdef CONFIG_SMP -static void sched_ttwu_do_pending(struct task_struct *list) +static void sched_ttwu_pending(void) { struct rq *rq = this_rq(); + struct llist_node *llist = llist_del_all(&rq->wake_list); + struct task_struct *p; raw_spin_lock(&rq->lock); - while (list) { - struct task_struct *p = list; - list = list->wake_entry; + while (llist) { + p = llist_entry(llist, struct task_struct, wake_entry); + llist = llist_next(llist); ttwu_do_activate(rq, p, 0); } raw_spin_unlock(&rq->lock); } -#ifdef CONFIG_HOTPLUG_CPU - -static void sched_ttwu_pending(void) -{ - struct rq *rq = this_rq(); - struct task_struct *list = xchg(&rq->wake_list, NULL); - - if (!list) - return; - - sched_ttwu_do_pending(list); -} - -#endif /* CONFIG_HOTPLUG_CPU */ - void scheduler_ipi(void) { - struct rq *rq = this_rq(); - struct task_struct *list = xchg(&rq->wake_list, NULL); - - if (!list) + if (llist_empty(&this_rq()->wake_list)) return; /* @@ -2652,25 +2636,13 @@ void scheduler_ipi(void) * somewhat pessimize the simple resched case. */ irq_enter(); - sched_ttwu_do_pending(list); + sched_ttwu_pending(); irq_exit(); } static void ttwu_queue_remote(struct task_struct *p, int cpu) { - struct rq *rq = cpu_rq(cpu); - struct task_struct *next = rq->wake_list; - - for (;;) { - struct task_struct *old = next; - - p->wake_entry = next; - next = cmpxchg(&rq->wake_list, old, p); - if (next == old) - break; - } - - if (!next) + if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) smp_send_reschedule(cpu); } From 82ff97f4f5669b5a2cc3c49e3a3d496194c27656 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Sep 2011 15:32:06 +0200 Subject: [PATCH 339/678] sched: Fix idle_cpu() On -rt we observed hackbench waking all 400 tasks to a single cpu. This is because of select_idle_sibling()'s interaction with the new ipi based wakeup scheme. The existing idle_cpu() test only checks to see if the current task on that cpu is the idle task, it does not take already queued tasks into account, nor does it take queued to be woken tasks into account. If the remote wakeup IPIs come hard enough, there won't be time to schedule away from the idle task, and would thus keep thinking the cpu was in fact idle, regardless of the fact that there were already several hundred tasks runnable. We couldn't reproduce on mainline, but there's no reason it couldn't happen. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3o30p18b2paswpc9ohy2gltp@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 945908b0a8f..865fa1e4283 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5107,7 +5107,20 @@ EXPORT_SYMBOL(task_nice); */ int idle_cpu(int cpu) { - return cpu_curr(cpu) == cpu_rq(cpu)->idle; + struct rq *rq = cpu_rq(cpu); + + if (rq->curr != rq->idle) + return 0; + + if (rq->nr_running) + return 0; + +#ifdef CONFIG_SMP + if (!llist_empty(&rq->wake_list)) + return 0; +#endif + + return 1; } /** From 8b3801e051268b7c6c85b279c52975c6d9f76eaa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 3 Oct 2011 15:09:00 -0700 Subject: [PATCH 340/678] sched: Use resched IPI to kick off the nohz idle balance Current use of smp call function to kick the nohz idle balance can deadlock in this scenario. 1. cpu-A did a generic_exec_single() to cpu-B and after queuing its call single data (csd) to the call single queue, cpu-A took a timer interrupt. Actual IPI to cpu-B to process the call single queue is not yet sent. 2. As part of the timer interrupt handler, cpu-A decided to kick cpu-B for the idle load balancing (sets cpu-B's rq->nohz_balance_kick to 1) and __smp_call_function_single() with nowait will queue the csd to the cpu-B's queue. But the generic_exec_single() won't send an IPI to cpu-B as the call single queue was not empty. 3. cpu-A is busy with lot of interrupts 4. Meanwhile cpu-B is entering and exiting idle and noticed that it has it's rq->nohz_balance_kick set to '1'. So it will go ahead and do the idle load balancer and clear its rq->nohz_balance_kick. 5. At this point, csd queued as part of the step-2 above is still locked and waiting to be serviced on cpu-B. 6. cpu-A is still busy with interrupt load and now it got another timer interrupt and as part of it decided to kick cpu-B for another idle load balancing (as it finds cpu-B's rq->nohz_balance_kick cleared in step-4 above) and does __smp_call_function_single() with the same csd that is still locked. 7. And we get a deadlock waiting for the csd_lock() in the __smp_call_function_single(). Main issue here is that cpu-B can service the idle load balancer kick request from cpu-A even with out receiving the IPI and this lead to doing multiple __smp_call_function_single() on the same csd leading to deadlock. To kick a cpu, scheduler already has the reschedule vector reserved. Use that mechanism (kick_process()) instead of using the generic smp call function mechanism to kick off the nohz idle load balancing and avoid the deadlock. [ This issue is present from 2.6.35+ kernels, but marking it -stable only from v3.0+ as the proposed fix depends on the scheduler_ipi() that is introduced recently. ] Reported-by: Prarit Bhargava Signed-off-by: Suresh Siddha Cc: stable@kernel.org # v3.0+ Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111003220934.834943260@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 21 +++++++++++++++++++-- kernel/sched_fair.c | 29 +++++++++-------------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 865fa1e4283..ccfbd7308d8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1282,6 +1282,18 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } +static inline bool got_nohz_idle_kick(void) +{ + return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick; +} + +#else /* CONFIG_NO_HZ */ + +static inline bool got_nohz_idle_kick(void) +{ + return false; +} + #endif /* CONFIG_NO_HZ */ static u64 sched_avg_period(void) @@ -2619,7 +2631,7 @@ static void sched_ttwu_pending(void) void scheduler_ipi(void) { - if (llist_empty(&this_rq()->wake_list)) + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) return; /* @@ -2637,6 +2649,12 @@ void scheduler_ipi(void) */ irq_enter(); sched_ttwu_pending(); + + /* + * Check if someone kicked us for doing the nohz idle load balance. + */ + if (unlikely(got_nohz_idle_kick() && !need_resched())) + raise_softirq_irqoff(SCHED_SOFTIRQ); irq_exit(); } @@ -8257,7 +8275,6 @@ void __init sched_init(void) rq_attach_root(rq, &def_root_domain); #ifdef CONFIG_NO_HZ rq->nohz_balance_kick = 0; - init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i)); #endif #endif init_rq_hrtick(rq); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 06e3c0b7c3d..5b549a828aa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3620,22 +3620,6 @@ static int active_load_balance_cpu_stop(void *data) } #ifdef CONFIG_NO_HZ - -static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb); - -static void trigger_sched_softirq(void *data) -{ - raise_softirq_irqoff(SCHED_SOFTIRQ); -} - -static inline void init_sched_softirq_csd(struct call_single_data *csd) -{ - csd->func = trigger_sched_softirq; - csd->info = NULL; - csd->flags = 0; - csd->priv = 0; -} - /* * idle load balancing details * - One of the idle CPUs nominates itself as idle load_balancer, while @@ -3801,11 +3785,16 @@ static void nohz_balancer_kick(int cpu) } if (!cpu_rq(ilb_cpu)->nohz_balance_kick) { - struct call_single_data *cp; - cpu_rq(ilb_cpu)->nohz_balance_kick = 1; - cp = &per_cpu(remote_sched_softirq_cb, cpu); - __smp_call_function_single(ilb_cpu, cp, 0); + + smp_mb(); + /* + * Use smp_send_reschedule() instead of resched_cpu(). + * This way we generate a sched IPI on the target cpu which + * is idle. And the softirq performing nohz idle load balance + * will be run before returning from the IPI. + */ + smp_send_reschedule(ilb_cpu); } return; } From 79a0d73cf207829df90dbd822e36bcf571da4815 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 3 Oct 2011 15:09:01 -0700 Subject: [PATCH 341/678] sched: Request for idle balance during nohz idle load balance rq's idle_at_tick is set to idle/busy during the timer tick depending on the cpu was idle or not. This will be used later in the load balance that will be done in the softirq context (which is a process context in -RT kernels). For nohz kernels, for the cpu doing nohz idle load balance on behalf of all the idle cpu's, its rq->idle_at_tick might have a stale value (which is recorded when it got the timer tick presumably when it is busy). As the nohz idle load balancing is also being done at the same place as the regular load balancing, nohz idle load balancing was bailing out when it sees rq's idle_at_tick not set. Thus leading to poor system utilization. Rename rq's idle_at_tick to idle_balance and set it when someone requests for nohz idle balance on an idle cpu. Reported-by: Srivatsa Vaddagiri Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111003220934.892350549@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 +++++--- kernel/sched_fair.c | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index ccfbd7308d8..eb15c890ed3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -516,7 +516,7 @@ struct rq { unsigned long cpu_power; - unsigned char idle_at_tick; + unsigned char idle_balance; /* For active balancing */ int post_schedule; int active_balance; @@ -2653,8 +2653,10 @@ void scheduler_ipi(void) /* * Check if someone kicked us for doing the nohz idle load balance. */ - if (unlikely(got_nohz_idle_kick() && !need_resched())) + if (unlikely(got_nohz_idle_kick() && !need_resched())) { + this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); + } irq_exit(); } @@ -4216,7 +4218,7 @@ void scheduler_tick(void) perf_event_task_tick(); #ifdef CONFIG_SMP - rq->idle_at_tick = idle_cpu(cpu); + rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5b549a828aa..612dda7af5b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -4029,7 +4029,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) if (time_before(now, nohz.next_balance)) return 0; - if (rq->idle_at_tick) + if (idle_cpu(cpu)) return 0; first_pick_cpu = atomic_read(&nohz.first_pick_cpu); @@ -4065,7 +4065,7 @@ static void run_rebalance_domains(struct softirq_action *h) { int this_cpu = smp_processor_id(); struct rq *this_rq = cpu_rq(this_cpu); - enum cpu_idle_type idle = this_rq->idle_at_tick ? + enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; rebalance_domains(this_cpu, idle); From d5913c4009e1d5110477061fef3b4fa8c6488248 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 24 Aug 2011 09:40:25 +0200 Subject: [PATCH 342/678] proc: Consider NO_HZ when printing idle and iowait times show_stat handler of the /proc/stat file relies on kstat_cpu(cpu) statistics when priting information about idle and iowait times. This is OK if we are not using tickless kernel (CONFIG_NO_HZ) because counters are updated periodically. With NO_HZ things got more tricky because we are not doing idle/iowait accounting while we are tickless so the value might get outdated. Users of /proc/stat will notice that by unchanged idle/iowait values which is then interpreted as 0% idle/iowait time. From the user space POV this is an unexpected behavior and a change of the interface. Let's fix this by using get_cpu_{idle,iowait}_time_us which accounts the total idle/iowait time since boot and it doesn't rely on sampling or any other periodic activity. Fall back to the previous behavior if NO_HZ is disabled or not configured. Signed-off-by: Michal Hocko Cc: Dave Jones Cc: Arnd Bergmann Cc: Alexey Dobriyan Link: http://lkml.kernel.org/r/39181366adac1b39cb6aa3cd53ff0f7c78d32676.1314172057.git.mhocko@suse.cz Signed-off-by: Thomas Gleixner Conflicts: fs/proc/stat.c --- fs/proc/stat.c | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 4b758ad5c83..509f4596f98 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -10,6 +10,7 @@ #include #include #include +#include #ifndef arch_irq_stat_cpu #define arch_irq_stat_cpu(cpu) 0 @@ -21,6 +22,35 @@ #define arch_idle_time(cpu) 0 #endif +static cputime64_t get_idle_time(int cpu) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); + cputime64_t idle; + + if (idle_time == -1ULL) { + /* !NO_HZ so we can rely on cpustat.idle */ + idle = kstat_cpu(cpu).cpustat.idle; + idle = cputime64_add(idle, arch_idle_time(cpu)); + } else + idle = usecs_to_cputime(idle_time); + + return idle; +} + +static cputime64_t get_iowait_time(int cpu) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL); + cputime64_t iowait; + + if (iowait_time == -1ULL) + /* !NO_HZ so we can rely on cpustat.iowait */ + iowait = kstat_cpu(cpu).cpustat.iowait; + else + iowait = usecs_to_cputime(iowait_time); + + return iowait; +} + static int show_stat(struct seq_file *p, void *v) { int i, j; @@ -42,9 +72,8 @@ static int show_stat(struct seq_file *p, void *v) user = cputime64_add(user, kstat_cpu(i).cpustat.user); nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); system = cputime64_add(system, kstat_cpu(i).cpustat.system); - idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); - idle = cputime64_add(idle, arch_idle_time(i)); - iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); + idle = cputime64_add(idle, get_idle_time(i)); + iowait = cputime64_add(iowait, get_iowait_time(i)); irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); @@ -80,14 +109,12 @@ static int show_stat(struct seq_file *p, void *v) #else for_each_online_cpu(i) { #endif - /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ user = kstat_cpu(i).cpustat.user; nice = kstat_cpu(i).cpustat.nice; system = kstat_cpu(i).cpustat.system; - idle = kstat_cpu(i).cpustat.idle; - idle = cputime64_add(idle, arch_idle_time(i)); - iowait = kstat_cpu(i).cpustat.iowait; + idle = get_idle_time(i); + iowait = get_iowait_time(i); irq = kstat_cpu(i).cpustat.irq; softirq = kstat_cpu(i).cpustat.softirq; steal = kstat_cpu(i).cpustat.steal; From e1606a8ef90968c763690e7d43610be7a6d55d0b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 8 Dec 2011 14:34:32 -0800 Subject: [PATCH 343/678] procfs: do not overflow get_{idle,iowait}_time for nohz Since commit a25cac5198d4 ("proc: Consider NO_HZ when printing idle and iowait times") we are reporting idle/io_wait time also while a CPU is tickless. We rely on get_{idle,iowait}_time functions to retrieve proper data. These functions, however, use usecs_to_cputime to translate micro seconds time to cputime64_t. This is just an alias to usecs_to_jiffies which reduces the data type from u64 to unsigned int and also checks whether the given parameter overflows jiffies_to_usecs(MAX_JIFFY_OFFSET) and returns MAX_JIFFY_OFFSET in that case. When we overflow depends on CONFIG_HZ but especially for CONFIG_HZ_300 it is quite low (1431649781) so we are getting MAX_JIFFY_OFFSET for >3000s! until we overflow unsigned int. Just for reference CONFIG_HZ_100 has an overflow window around 20s, CONFIG_HZ_250 ~8s and CONFIG_HZ_1000 ~2s. This results in a bug when people saw [h]top going mad reporting 100% CPU usage even though there was basically no CPU load. The reason was simply that /proc/stat stopped reporting idle/io_wait changes (and reported MAX_JIFFY_OFFSET) and so the only change happening was for user system time. Let's use nsecs_to_jiffies64 instead which doesn't reduce the precision to 32b type and it is much more appropriate for cumulative time values (unlike usecs_to_jiffies which intended for timeout calculations). Signed-off-by: Michal Hocko Tested-by: Artem S. Tashkinov Cc: Dave Jones Cc: Arnd Bergmann Cc: Alexey Dobriyan Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 509f4596f98..e83506b22d9 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -32,7 +32,7 @@ static cputime64_t get_idle_time(int cpu) idle = kstat_cpu(cpu).cpustat.idle; idle = cputime64_add(idle, arch_idle_time(cpu)); } else - idle = usecs_to_cputime(idle_time); + idle = nsecs_to_jiffies64(1000 * idle_time); return idle; } @@ -46,7 +46,7 @@ static cputime64_t get_iowait_time(int cpu) /* !NO_HZ so we can rely on cpustat.iowait */ iowait = kstat_cpu(cpu).cpustat.iowait; else - iowait = usecs_to_cputime(iowait_time); + iowait = nsecs_to_jiffies64(1000 * iowait_time); return iowait; } From e244eb09a9a984876f0f7dfd24b7e55821854c00 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 27 Mar 2012 15:09:37 -0400 Subject: [PATCH 344/678] nohz: Fix stale jiffies update in tick_nohz_restart() commit 6f103929f8979d2638e58d7f7fda0beefcb8ee7e upstream. Fix tick_nohz_restart() to not use a stale ktime_t "now" value when calling tick_do_update_jiffies64(now). If we reach this point in the loop it means that we crossed a tick boundary since we grabbed the "now" timestamp, so at this point "now" refers to a time in the old jiffy, so using the old value for "now" is incorrect, and is likely to give us a stale jiffies value. In particular, the first time through the loop the tick_do_update_jiffies64(now) call is always a no-op, since the caller, tick_nohz_restart_sched_tick(), will have already called tick_do_update_jiffies64(now) with that "now" value. Note that tick_nohz_stop_sched_tick() already uses the correct approach: when we notice we cross a jiffy boundary, grab a new timestamp with ktime_get(), and *then* update jiffies. Signed-off-by: Neal Cardwell Cc: Ben Segall Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1332875377-23014-1-git-send-email-ncardwell@google.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 40420644d0b..c9236404aba 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -508,9 +508,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) hrtimer_get_expires(&ts->sched_timer), 0)) break; } - /* Update jiffies and reread time */ - tick_do_update_jiffies64(now); + /* Reread time and update jiffies */ now = ktime_get(); + tick_do_update_jiffies64(now); } } From 206258c44433a74ab6b495adbb177d32ae37e45d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Mar 2012 15:04:46 +0100 Subject: [PATCH 345/678] sched: Fix nohz load accounting -- again! MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c308b56b5398779cd3da0f62ab26b0453494c3d4 upstream. Various people reported nohz load tracking still being wrecked, but Doug spotted the actual problem. We fold the nohz remainder in too soon, causing us to loose samples and under-account. So instead of playing catch-up up-front, always do a single load-fold with whatever state we encounter and only then fold the nohz remainder and play catch-up. Reported-by: Doug Smythies Reported-by: LesÃ…=82aw Kope=C4=87 Reported-by: Aman Gupta Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-4v31etnhgg9kwd6ocgx3rxl8@git.kernel.org Signed-off-by: Ingo Molnar [bwh: Backported to 3.2: change filename] Signed-off-by: Ben Hutchings --- kernel/sched.c | 53 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index eb15c890ed3..f1dd0de998d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3479,13 +3479,10 @@ calc_load_n(unsigned long load, unsigned long exp, * Once we've updated the global active value, we need to apply the exponential * weights adjusted to the number of cycles missed. */ -static void calc_global_nohz(unsigned long ticks) +static void calc_global_nohz(void) { long delta, active, n; - if (time_before(jiffies, calc_load_update)) - return; - /* * If we crossed a calc_load_update boundary, make sure to fold * any pending idle changes, the respective CPUs might have @@ -3497,31 +3494,25 @@ static void calc_global_nohz(unsigned long ticks) atomic_long_add(delta, &calc_load_tasks); /* - * If we were idle for multiple load cycles, apply them. + * It could be the one fold was all it took, we done! */ - if (ticks >= LOAD_FREQ) { - n = ticks / LOAD_FREQ; + if (time_before(jiffies, calc_load_update + 10)) + return; - active = atomic_long_read(&calc_load_tasks); - active = active > 0 ? active * FIXED_1 : 0; + /* + * Catch-up, fold however many we are behind still + */ + delta = jiffies - calc_load_update - 10; + n = 1 + (delta / LOAD_FREQ); - avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); - avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); - avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; - calc_load_update += n * LOAD_FREQ; - } + avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); + avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); + avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - /* - * Its possible the remainder of the above division also crosses - * a LOAD_FREQ period, the regular check in calc_global_load() - * which comes after this will take care of that. - * - * Consider us being 11 ticks before a cycle completion, and us - * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will - * age us 4 cycles, and the test in calc_global_load() will - * pick up the final one. - */ + calc_load_update += n * LOAD_FREQ; } #else static void calc_load_account_idle(struct rq *this_rq) @@ -3533,7 +3524,7 @@ static inline long calc_load_fold_idle(void) return 0; } -static void calc_global_nohz(unsigned long ticks) +static void calc_global_nohz(void) { } #endif @@ -3561,8 +3552,6 @@ void calc_global_load(unsigned long ticks) { long active; - calc_global_nohz(ticks); - if (time_before(jiffies, calc_load_update + 10)) return; @@ -3574,6 +3563,16 @@ void calc_global_load(unsigned long ticks) avenrun[2] = calc_load(avenrun[2], EXP_15, active); calc_load_update += LOAD_FREQ; + + /* + * Account one period with whatever state we found before + * folding in the nohz state and ageing the entire idle period. + * + * This avoids loosing a sample when we go idle between + * calc_load_account_active() (10 ticks ago) and now and thus + * under-accounting. + */ + calc_global_nohz(); } /* From 6243e831b698171521b55662a1eb9ad0c179857f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jun 2012 15:52:09 +0200 Subject: [PATCH 346/678] sched/nohz: Rewrite and fix load-avg computation -- again commit 5167e8d5417bf5c322a703d2927daec727ea40dd upstream. Thanks to Charles Wang for spotting the defects in the current code: - If we go idle during the sample window -- after sampling, we get a negative bias because we can negate our own sample. - If we wake up during the sample window we get a positive bias because we push the sample to a known active period. So rewrite the entire nohz load-avg muck once again, now adding copious documentation to the code. Reported-and-tested-by: Doug Smythies Reported-and-tested-by: Charles Wang Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1340373782.18025.74.camel@twins [ minor edits ] Signed-off-by: Ingo Molnar [bwh: Backported to 3.2: adjust filenames, context] Signed-off-by: Ben Hutchings --- include/linux/sched.h | 8 ++ kernel/sched.c | 276 ++++++++++++++++++++++++++++----------- kernel/sched_idletask.c | 1 - kernel/time/tick-sched.c | 2 + 4 files changed, 213 insertions(+), 74 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index aabc9a7972b..39edc7e5af4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1894,6 +1894,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif +#ifdef CONFIG_NO_HZ +void calc_load_enter_idle(void); +void calc_load_exit_idle(void); +#else +static inline void calc_load_enter_idle(void) { } +static inline void calc_load_exit_idle(void) { } +#endif /* CONFIG_NO_HZ */ + #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { diff --git a/kernel/sched.c b/kernel/sched.c index f1dd0de998d..5dc353da6f1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1750,7 +1750,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) #endif -static void calc_load_account_idle(struct rq *this_rq); static void update_sysctl(void); static int get_update_sysctl_factor(void); @@ -3342,11 +3341,73 @@ unsigned long this_cpu_load(void) } +/* + * Global load-average calculations + * + * We take a distributed and async approach to calculating the global load-avg + * in order to minimize overhead. + * + * The global load average is an exponentially decaying average of nr_running + + * nr_uninterruptible. + * + * Once every LOAD_FREQ: + * + * nr_active = 0; + * for_each_possible_cpu(cpu) + * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible; + * + * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n) + * + * Due to a number of reasons the above turns in the mess below: + * + * - for_each_possible_cpu() is prohibitively expensive on machines with + * serious number of cpus, therefore we need to take a distributed approach + * to calculating nr_active. + * + * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 + * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } + * + * So assuming nr_active := 0 when we start out -- true per definition, we + * can simply take per-cpu deltas and fold those into a global accumulate + * to obtain the same result. See calc_load_fold_active(). + * + * Furthermore, in order to avoid synchronizing all per-cpu delta folding + * across the machine, we assume 10 ticks is sufficient time for every + * cpu to have completed this task. + * + * This places an upper-bound on the IRQ-off latency of the machine. Then + * again, being late doesn't loose the delta, just wrecks the sample. + * + * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because + * this would add another cross-cpu cacheline miss and atomic operation + * to the wakeup path. Instead we increment on whatever cpu the task ran + * when it went into uninterruptible state and decrement on whatever cpu + * did the wakeup. This means that only the sum of nr_uninterruptible over + * all cpus yields the correct result. + * + * This covers the NO_HZ=n code, for extra head-aches, see the comment below. + */ + /* Variables and functions for calc_load */ static atomic_long_t calc_load_tasks; static unsigned long calc_load_update; unsigned long avenrun[3]; -EXPORT_SYMBOL(avenrun); +EXPORT_SYMBOL(avenrun); /* should be removed */ + +/** + * get_avenrun - get the load average array + * @loads: pointer to dest load array + * @offset: offset to add + * @shift: shift count to shift the result left + * + * These values are estimates at best, so no need for locking. + */ +void get_avenrun(unsigned long *loads, unsigned long offset, int shift) +{ + loads[0] = (avenrun[0] + offset) << shift; + loads[1] = (avenrun[1] + offset) << shift; + loads[2] = (avenrun[2] + offset) << shift; +} static long calc_load_fold_active(struct rq *this_rq) { @@ -3363,6 +3424,9 @@ static long calc_load_fold_active(struct rq *this_rq) return delta; } +/* + * a1 = a0 * e + a * (1 - e) + */ static unsigned long calc_load(unsigned long load, unsigned long exp, unsigned long active) { @@ -3374,30 +3438,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) #ifdef CONFIG_NO_HZ /* - * For NO_HZ we delay the active fold to the next LOAD_FREQ update. + * Handle NO_HZ for the global load-average. + * + * Since the above described distributed algorithm to compute the global + * load-average relies on per-cpu sampling from the tick, it is affected by + * NO_HZ. + * + * The basic idea is to fold the nr_active delta into a global idle-delta upon + * entering NO_HZ state such that we can include this as an 'extra' cpu delta + * when we read the global state. + * + * Obviously reality has to ruin such a delightfully simple scheme: + * + * - When we go NO_HZ idle during the window, we can negate our sample + * contribution, causing under-accounting. + * + * We avoid this by keeping two idle-delta counters and flipping them + * when the window starts, thus separating old and new NO_HZ load. + * + * The only trick is the slight shift in index flip for read vs write. + * + * 0s 5s 10s 15s + * +10 +10 +10 +10 + * |-|-----------|-|-----------|-|-----------|-| + * r:0 0 1 1 0 0 1 1 0 + * w:0 1 1 0 0 1 1 0 0 + * + * This ensures we'll fold the old idle contribution in this window while + * accumlating the new one. + * + * - When we wake up from NO_HZ idle during the window, we push up our + * contribution, since we effectively move our sample point to a known + * busy state. + * + * This is solved by pushing the window forward, and thus skipping the + * sample, for this cpu (effectively using the idle-delta for this cpu which + * was in effect at the time the window opened). This also solves the issue + * of having to deal with a cpu having been in NOHZ idle for multiple + * LOAD_FREQ intervals. * * When making the ILB scale, we should try to pull this in as well. */ -static atomic_long_t calc_load_tasks_idle; +static atomic_long_t calc_load_idle[2]; +static int calc_load_idx; -static void calc_load_account_idle(struct rq *this_rq) +static inline int calc_load_write_idx(void) { + int idx = calc_load_idx; + + /* + * See calc_global_nohz(), if we observe the new index, we also + * need to observe the new update time. + */ + smp_rmb(); + + /* + * If the folding window started, make sure we start writing in the + * next idle-delta. + */ + if (!time_before(jiffies, calc_load_update)) + idx++; + + return idx & 1; +} + +static inline int calc_load_read_idx(void) +{ + return calc_load_idx & 1; +} + +void calc_load_enter_idle(void) +{ + struct rq *this_rq = this_rq(); long delta; + /* + * We're going into NOHZ mode, if there's any pending delta, fold it + * into the pending idle delta. + */ delta = calc_load_fold_active(this_rq); - if (delta) - atomic_long_add(delta, &calc_load_tasks_idle); + if (delta) { + int idx = calc_load_write_idx(); + atomic_long_add(delta, &calc_load_idle[idx]); + } } -static long calc_load_fold_idle(void) +void calc_load_exit_idle(void) { - long delta = 0; + struct rq *this_rq = this_rq(); + + /* + * If we're still before the sample window, we're done. + */ + if (time_before(jiffies, this_rq->calc_load_update)) + return; /* - * Its got a race, we don't care... + * We woke inside or after the sample window, this means we're already + * accounted through the nohz accounting, so skip the entire deal and + * sync up for the next window. */ - if (atomic_long_read(&calc_load_tasks_idle)) - delta = atomic_long_xchg(&calc_load_tasks_idle, 0); + this_rq->calc_load_update = calc_load_update; + if (time_before(jiffies, this_rq->calc_load_update + 10)) + this_rq->calc_load_update += LOAD_FREQ; +} + +static long calc_load_fold_idle(void) +{ + int idx = calc_load_read_idx(); + long delta = 0; + + if (atomic_long_read(&calc_load_idle[idx])) + delta = atomic_long_xchg(&calc_load_idle[idx], 0); return delta; } @@ -3483,66 +3635,39 @@ static void calc_global_nohz(void) { long delta, active, n; - /* - * If we crossed a calc_load_update boundary, make sure to fold - * any pending idle changes, the respective CPUs might have - * missed the tick driven calc_load_account_active() update - * due to NO_HZ. - */ - delta = calc_load_fold_idle(); - if (delta) - atomic_long_add(delta, &calc_load_tasks); - - /* - * It could be the one fold was all it took, we done! - */ - if (time_before(jiffies, calc_load_update + 10)) - return; - - /* - * Catch-up, fold however many we are behind still - */ - delta = jiffies - calc_load_update - 10; - n = 1 + (delta / LOAD_FREQ); + if (!time_before(jiffies, calc_load_update + 10)) { + /* + * Catch-up, fold however many we are behind still + */ + delta = jiffies - calc_load_update - 10; + n = 1 + (delta / LOAD_FREQ); - active = atomic_long_read(&calc_load_tasks); - active = active > 0 ? active * FIXED_1 : 0; + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; - avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); - avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); - avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); + avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); + avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - calc_load_update += n * LOAD_FREQ; -} -#else -static void calc_load_account_idle(struct rq *this_rq) -{ -} + calc_load_update += n * LOAD_FREQ; + } -static inline long calc_load_fold_idle(void) -{ - return 0; + /* + * Flip the idle index... + * + * Make sure we first write the new time then flip the index, so that + * calc_load_write_idx() will see the new time when it reads the new + * index, this avoids a double flip messing things up. + */ + smp_wmb(); + calc_load_idx++; } +#else /* !CONFIG_NO_HZ */ -static void calc_global_nohz(void) -{ -} -#endif +static inline long calc_load_fold_idle(void) { return 0; } +static inline void calc_global_nohz(void) { } -/** - * get_avenrun - get the load average array - * @loads: pointer to dest load array - * @offset: offset to add - * @shift: shift count to shift the result left - * - * These values are estimates at best, so no need for locking. - */ -void get_avenrun(unsigned long *loads, unsigned long offset, int shift) -{ - loads[0] = (avenrun[0] + offset) << shift; - loads[1] = (avenrun[1] + offset) << shift; - loads[2] = (avenrun[2] + offset) << shift; -} +#endif /* CONFIG_NO_HZ */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -3550,11 +3675,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) */ void calc_global_load(unsigned long ticks) { - long active; + long active, delta; if (time_before(jiffies, calc_load_update + 10)) return; + /* + * Fold the 'old' idle-delta to include all NO_HZ cpus. + */ + delta = calc_load_fold_idle(); + if (delta) + atomic_long_add(delta, &calc_load_tasks); + active = atomic_long_read(&calc_load_tasks); active = active > 0 ? active * FIXED_1 : 0; @@ -3565,12 +3697,7 @@ void calc_global_load(unsigned long ticks) calc_load_update += LOAD_FREQ; /* - * Account one period with whatever state we found before - * folding in the nohz state and ageing the entire idle period. - * - * This avoids loosing a sample when we go idle between - * calc_load_account_active() (10 ticks ago) and now and thus - * under-accounting. + * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. */ calc_global_nohz(); } @@ -3587,13 +3714,16 @@ static void calc_load_account_active(struct rq *this_rq) return; delta = calc_load_fold_active(this_rq); - delta += calc_load_fold_idle(); if (delta) atomic_long_add(delta, &calc_load_tasks); this_rq->calc_load_update += LOAD_FREQ; } +/* + * End of global load-average stuff + */ + /* * The exact cpuload at various idx values, calculated at every tick would be * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 0a51882534e..be92bfe3929 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -23,7 +23,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); - calc_load_account_idle(rq); return rq->idle; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c9236404aba..9955ebd7ab7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -430,6 +430,7 @@ void tick_nohz_stop_sched_tick(int inidle) */ if (!ts->tick_stopped) { select_nohz_load_balancer(1); + calc_load_enter_idle(); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; @@ -563,6 +564,7 @@ void tick_nohz_restart_sched_tick(void) account_idle_ticks(ticks); #endif + calc_load_exit_idle(); touch_softlockup_watchdog(); /* * Cancel the scheduled timer and restore the tick From a1faecabb447cd5ec7542ab31df461fa74a53dd7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 May 2012 17:15:29 +0200 Subject: [PATCH 347/678] sched/nohz: Fix rq->cpu_load calculations some more commit 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 upstream. Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[] calculations") since while that fixed the busy case it regressed the mostly idle case. Add a callback from the nohz exit to also age the rq->cpu_load[] array. This closes the hole where either there was no nohz load balance pass during the nohz, or there was a 'significant' amount of idle time between the last nohz balance and the nohz exit. So we'll update unconditionally from the tick to not insert any accidental 0 load periods while busy, and we try and catch up from nohz idle balance and nohz exit. Both these are still prone to missing a jiffy, but that has always been the case. Signed-off-by: Peter Zijlstra Cc: pjt@google.com Cc: Venkatesh Pallipadi Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org Signed-off-by: Ingo Molnar [bwh: Backported to 3.2: adjust filenames and context] Signed-off-by: Ben Hutchings --- include/linux/sched.h | 1 + kernel/sched.c | 53 ++++++++++++++++++++++++++++++++-------- kernel/time/tick-sched.c | 1 + 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 39edc7e5af4..c404b24397c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -147,6 +147,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); +extern void update_cpu_load_nohz(void); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/sched.c b/kernel/sched.c index 5dc353da6f1..7fa8bedab91 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3827,25 +3827,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } +#ifdef CONFIG_NO_HZ +/* + * There is no sane way to deal with nohz on smp when using jiffies because the + * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading + * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. + * + * Therefore we cannot use the delta approach from the regular tick since that + * would seriously skew the load calculation. However we'll make do for those + * updates happening while idle (nohz_idle_balance) or coming out of idle + * (tick_nohz_idle_exit). + * + * This means we might still be one tick off for nohz periods. + */ + /* * Called from nohz_idle_balance() to update the load ratings before doing the * idle balance. */ void update_idle_cpu_load(struct rq *this_rq) { - unsigned long curr_jiffies = jiffies; + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); unsigned long load = this_rq->load.weight; unsigned long pending_updates; /* - * Bloody broken means of dealing with nohz, but better than nothing.. - * jiffies is updated by one cpu, another cpu can drift wrt the jiffy - * update and see 0 difference the one time and 2 the next, even though - * we ticked at roughtly the same rate. - * - * Hence we only use this from nohz_idle_balance() and skip this - * nonsense when called from the scheduler_tick() since that's - * guaranteed a stable rate. + * bail if there's load or we're actually up-to-date. */ if (load || curr_jiffies == this_rq->last_load_update_tick) return; @@ -3856,13 +3863,39 @@ void update_idle_cpu_load(struct rq *this_rq) __update_cpu_load(this_rq, load, pending_updates); } +/* + * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. + */ +void update_cpu_load_nohz(void) +{ + struct rq *this_rq = this_rq(); + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); + unsigned long pending_updates; + + if (curr_jiffies == this_rq->last_load_update_tick) + return; + + raw_spin_lock(&this_rq->lock); + pending_updates = curr_jiffies - this_rq->last_load_update_tick; + if (pending_updates) { + this_rq->last_load_update_tick = curr_jiffies; + /* + * We were idle, this means load 0, the current load might be + * !0 due to remote wakeups and the sort. + */ + __update_cpu_load(this_rq, 0, pending_updates); + } + raw_spin_unlock(&this_rq->lock); +} +#endif /* CONFIG_NO_HZ */ + /* * Called from scheduler_tick() */ static void update_cpu_load_active(struct rq *this_rq) { /* - * See the mess in update_idle_cpu_load(). + * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). */ this_rq->last_load_update_tick = jiffies; __update_cpu_load(this_rq, this_rq->load.weight, 1); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9955ebd7ab7..793548cb5a9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -549,6 +549,7 @@ void tick_nohz_restart_sched_tick(void) /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); + update_cpu_load_nohz(); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* From 360a9e1faf8a55c61a7a8cb934c18d06937b1f75 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 10 Oct 2012 11:51:09 +0530 Subject: [PATCH 348/678] nohz: Fix idle ticks in cpu summary line of /proc/stat commit 7386cdbf2f57ea8cff3c9fde93f206e58b9fe13f upstream. Git commit 09a1d34f8535ecf9 "nohz: Make idle/iowait counter update conditional" introduced a bug in regard to cpu hotplug. The effect is that the number of idle ticks in the cpu summary line in /proc/stat is still counting ticks for offline cpus. Reproduction is easy, just start a workload that keeps all cpus busy, switch off one or more cpus and then watch the idle field in top. On a dual-core with one cpu 100% busy and one offline cpu you will get something like this: %Cpu(s): 48.7 us, 1.3 sy, 0.0 ni, 50.0 id, 0.0 wa, 0.0 hi, 0.0 si, %0.0 st The problem is that an offline cpu still has ts->idle_active == 1. To fix this we should make sure that the cpu is online when calling get_cpu_idle_time_us and get_cpu_iowait_time_us. [Srivatsa: Rebased to current mainline] Reported-by: Martin Schwidefsky Signed-off-by: Michal Hocko Reviewed-by: Srivatsa S. Bhat Signed-off-by: Srivatsa S. Bhat Link: http://lkml.kernel.org/r/20121010061820.8999.57245.stgit@srivatsabhat.in.ibm.com Cc: deepthi@linux.vnet.ibm.com Signed-off-by: Thomas Gleixner [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings --- fs/proc/stat.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index e83506b22d9..e2ef91dcf3d 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -24,11 +24,14 @@ static cputime64_t get_idle_time(int cpu) { - u64 idle_time = get_cpu_idle_time_us(cpu, NULL); + u64 idle_time = -1ULL; cputime64_t idle; + if (cpu_online(cpu)) + idle_time = get_cpu_idle_time_us(cpu, NULL); + if (idle_time == -1ULL) { - /* !NO_HZ so we can rely on cpustat.idle */ + /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kstat_cpu(cpu).cpustat.idle; idle = cputime64_add(idle, arch_idle_time(cpu)); } else @@ -39,11 +42,14 @@ static cputime64_t get_idle_time(int cpu) static cputime64_t get_iowait_time(int cpu) { - u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL); + u64 iowait_time = -1ULL; cputime64_t iowait; + if (cpu_online(cpu)) + iowait_time = get_cpu_iowait_time_us(cpu, NULL); + if (iowait_time == -1ULL) - /* !NO_HZ so we can rely on cpustat.iowait */ + /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ iowait = kstat_cpu(cpu).cpustat.iowait; else iowait = nsecs_to_jiffies64(1000 * iowait_time); From e20ceb1709f16ea094e46e5c11ab0c3148e7e609 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 17 Aug 2011 06:58:04 -0700 Subject: [PATCH 349/678] treewide: Correct spelling of successfully in comments Signed-off-by: Joe Perches Signed-off-by: Jiri Kosina --- drivers/media/video/cx18/cx18-mailbox.h | 2 +- drivers/net/igb/e1000_mbx.c | 2 +- drivers/net/igbvf/mbx.c | 2 +- drivers/net/ixgbe/ixgbe_mbx.c | 2 +- drivers/net/ixgbevf/mbx.c | 2 +- drivers/net/phy/broadcom.c | 2 +- drivers/net/tile/tilepro.c | 2 +- drivers/target/iscsi/iscsi_target_nego.c | 2 +- drivers/target/target_core_tpg.c | 2 +- kernel/sched.c | 2 +- sound/core/memalloc.c | 4 ++-- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/media/video/cx18/cx18-mailbox.h b/drivers/media/video/cx18/cx18-mailbox.h index 05fe6bdbe06..b63fdfaac49 100644 --- a/drivers/media/video/cx18/cx18-mailbox.h +++ b/drivers/media/video/cx18/cx18-mailbox.h @@ -69,7 +69,7 @@ struct cx18_mailbox { /* Each command can have up to 6 arguments */ u32 args[MAX_MB_ARGUMENTS]; /* The return code can be one of the codes in the file cx23418.h. If the - command is completed successfuly, the error will be ERR_SYS_SUCCESS. + command is completed successfully, the error will be ERR_SYS_SUCCESS. If it is pending, the code is ERR_SYS_PENDING. If it failed, the error code would indicate the task from which the error originated and will be one of the errors in cx23418.h. In that case, the following diff --git a/drivers/net/igb/e1000_mbx.c b/drivers/net/igb/e1000_mbx.c index 74f2f11ac29..469d95eaa15 100644 --- a/drivers/net/igb/e1000_mbx.c +++ b/drivers/net/igb/e1000_mbx.c @@ -34,7 +34,7 @@ * @size: Length of buffer * @mbx_id: id of mailbox to read * - * returns SUCCESS if it successfuly read message from buffer + * returns SUCCESS if it successfully read message from buffer **/ s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) { diff --git a/drivers/net/igbvf/mbx.c b/drivers/net/igbvf/mbx.c index 3d6f4cc3998..048aae248d0 100644 --- a/drivers/net/igbvf/mbx.c +++ b/drivers/net/igbvf/mbx.c @@ -288,7 +288,7 @@ static s32 e1000_write_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size) * @msg: The message buffer * @size: Length of buffer * - * returns SUCCESS if it successfuly read message from buffer + * returns SUCCESS if it successfully read message from buffer **/ static s32 e1000_read_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size) { diff --git a/drivers/net/ixgbe/ixgbe_mbx.c b/drivers/net/ixgbe/ixgbe_mbx.c index 1ff0eefcfd0..3f725d48336 100644 --- a/drivers/net/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ixgbe/ixgbe_mbx.c @@ -38,7 +38,7 @@ * @size: Length of buffer * @mbx_id: id of mailbox to read * - * returns SUCCESS if it successfuly read message from buffer + * returns SUCCESS if it successfully read message from buffer **/ s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) { diff --git a/drivers/net/ixgbevf/mbx.c b/drivers/net/ixgbevf/mbx.c index 7a883312577..930fa83f256 100644 --- a/drivers/net/ixgbevf/mbx.c +++ b/drivers/net/ixgbevf/mbx.c @@ -276,7 +276,7 @@ static s32 ixgbevf_write_mbx_vf(struct ixgbe_hw *hw, u32 *msg, u16 size) * @msg: The message buffer * @size: Length of buffer * - * returns 0 if it successfuly read message from buffer + * returns 0 if it successfully read message from buffer **/ static s32 ixgbevf_read_mbx_vf(struct ixgbe_hw *hw, u32 *msg, u16 size) { diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index d84c4224dd1..e8be47d6d7d 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -553,7 +553,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) /* * There is no BCM5481 specification available, so down * here is everything we know about "register 0x18". This - * at least helps BCM5481 to successfuly receive packets + * at least helps BCM5481 to successfully receive packets * on MPC8360E-RDK board. Peter Barada * says: "This sets delay between the RXD and RXC signals * instead of using trace lengths to achieve timing". diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c index 1e2af96fc29..7b46e75deb5 100644 --- a/drivers/net/tile/tilepro.c +++ b/drivers/net/tile/tilepro.c @@ -177,7 +177,7 @@ struct tile_net_cpu { struct tile_net_stats_t stats; /* True iff NAPI is enabled. */ bool napi_enabled; - /* True if this tile has succcessfully registered with the IPP. */ + /* True if this tile has successfully registered with the IPP. */ bool registered; /* True if the link was down last time we tried to register. */ bool link_down; diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 4d087ac1106..426cd4bf6a9 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -504,7 +504,7 @@ static int iscsi_target_do_authentication( break; case 1: pr_debug("iSCSI security negotiation" - " completed sucessfully.\n"); + " completed successfully.\n"); login->auth_complete = 1; if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) && (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 162b736c734..49fd0a9b0a5 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -593,7 +593,7 @@ int core_tpg_set_initiator_node_queue_depth( if (init_sess) tpg->se_tpg_tfo->close_session(init_sess); - pr_debug("Successfuly changed queue depth to: %d for Initiator" + pr_debug("Successfully changed queue depth to: %d for Initiator" " Node: %s on %s Target Portal Group: %u\n", queue_depth, initiatorname, tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg)); diff --git a/kernel/sched.c b/kernel/sched.c index 7fa8bedab91..86efa99b7ca 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1759,7 +1759,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) #ifdef CONFIG_SMP /* * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be - * successfuly executed on another CPU. We must ensure that updates of + * successfully executed on another CPU. We must ensure that updates of * per-task data have been completed by this moment. */ smp_wmb(); diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 16bd9c03679..69156923843 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -176,7 +176,7 @@ static void snd_free_dev_pages(struct device *dev, size_t size, void *ptr, * Calls the memory-allocator function for the corresponding * buffer type. * - * Returns zero if the buffer with the given size is allocated successfuly, + * Returns zero if the buffer with the given size is allocated successfully, * other a negative value at error. */ int snd_dma_alloc_pages(int type, struct device *device, size_t size, @@ -230,7 +230,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, * tries to allocate again. The size actually allocated is stored in * res_size argument. * - * Returns zero if the buffer with the given size is allocated successfuly, + * Returns zero if the buffer with the given size is allocated successfully, * other a negative value at error. */ int snd_dma_alloc_pages_fallback(int type, struct device *device, size_t size, From ca56bec3046a0e02f729d2e5599fd29cd43081fb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 May 2011 08:31:09 -0700 Subject: [PATCH 350/678] rcu: Restore checks for blocking in RCU read-side critical sections Long ago, using TREE_RCU with PREEMPT would result in "scheduling while atomic" diagnostics if you blocked in an RCU read-side critical section. However, PREEMPT now implies TREE_PREEMPT_RCU, which defeats this diagnostic. This commit therefore adds a replacement diagnostic based on PROVE_RCU. Because rcu_lockdep_assert() and lockdep_rcu_dereference() are now being used for things that have nothing to do with rcu_dereference(), rename lockdep_rcu_dereference() to lockdep_rcu_suspicious() and add a third argument that is a string indicating what is suspicious. This third argument is passed in from a new third argument to rcu_lockdep_assert(). Update all calls to rcu_lockdep_assert() to add an informative third argument. Also, add a pair of rcu_lockdep_assert() calls from within rcu_note_context_switch(), one complaining if a context switch occurs in an RCU-bh read-side critical section and another complaining if a context switch occurs in an RCU-sched read-side critical section. These are present only if the PROVE_RCU kernel parameter is enabled. Finally, fix some checkpatch whitespace complaints in lockdep.c. Again, you must enable PROVE_RCU to see these new diagnostics. But you are enabling PROVE_RCU to check out new RCU uses in any case, aren't you? Signed-off-by: Paul E. McKenney --- include/linux/lockdep.h | 2 +- include/linux/rcupdate.h | 28 +++++++++++--- kernel/lockdep.c | 84 ++++++++++++++++++++++------------------ kernel/pid.c | 4 +- kernel/sched.c | 2 + 5 files changed, 75 insertions(+), 45 deletions(-) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index ef820a3c378..b6a56e37284 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -548,7 +548,7 @@ do { \ #endif #ifdef CONFIG_PROVE_RCU -extern void lockdep_rcu_dereference(const char *file, const int line); +void lockdep_rcu_suspicious(const char *file, const int line, const char *s); #endif #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f4f881a0ad..8e7470d8b67 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -297,19 +297,31 @@ extern int rcu_my_thread_group_empty(void); /** * rcu_lockdep_assert - emit lockdep splat if specified condition not met * @c: condition to check + * @s: informative message */ -#define rcu_lockdep_assert(c) \ +#define rcu_lockdep_assert(c, s) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) +#define rcu_sleep_check() \ + do { \ + rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ + "Illegal context switch in RCU-bh" \ + " read-side critical section"); \ + rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ + "Illegal context switch in RCU-sched"\ + " read-side critical section"); \ + } while (0) + #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c) do { } while (0) +#define rcu_lockdep_assert(c, s) do { } while (0) +#define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -338,14 +350,16 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -359,7 +373,9 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_index_check(p, c) \ ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, \ + "suspicious rcu_dereference_index_check()" \ + " usage"); \ smp_read_barrier_depends(); \ (_________p1); \ }) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 447960603fb..b7e4460a710 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -1130,10 +1130,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, if (debug_locks_silent) return 0; - printk("\n=======================================================\n"); - printk( "[ INFO: possible circular locking dependency detected ]\n"); + printk("\n"); + printk("======================================================\n"); + printk("[ INFO: possible circular locking dependency detected ]\n"); print_kernel_version(); - printk( "-------------------------------------------------------\n"); + printk("-------------------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(check_src); @@ -1464,11 +1465,12 @@ print_bad_irq_dependency(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n======================================================\n"); - printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", + printk("\n"); + printk("======================================================\n"); + printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", irqclass, irqclass); print_kernel_version(); - printk( "------------------------------------------------------\n"); + printk("------------------------------------------------------\n"); printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, @@ -1693,10 +1695,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n=============================================\n"); - printk( "[ INFO: possible recursive locking detected ]\n"); + printk("\n"); + printk("=============================================\n"); + printk("[ INFO: possible recursive locking detected ]\n"); print_kernel_version(); - printk( "---------------------------------------------\n"); + printk("---------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(next); @@ -2178,10 +2181,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n=================================\n"); - printk( "[ INFO: inconsistent lock state ]\n"); + printk("\n"); + printk("=================================\n"); + printk("[ INFO: inconsistent lock state ]\n"); print_kernel_version(); - printk( "---------------------------------\n"); + printk("---------------------------------\n"); printk("inconsistent {%s} -> {%s} usage.\n", usage_str[prev_bit], usage_str[new_bit]); @@ -2242,10 +2246,11 @@ print_irq_inversion_bug(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n=========================================================\n"); - printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); + printk("\n"); + printk("=========================================================\n"); + printk("[ INFO: possible irq lock inversion dependency detected ]\n"); print_kernel_version(); - printk( "---------------------------------------------------------\n"); + printk("---------------------------------------------------------\n"); printk("%s/%d just changed the state of lock:\n", curr->comm, task_pid_nr(curr)); print_lock(this); @@ -3071,9 +3076,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, if (debug_locks_silent) return 0; - printk("\n=====================================\n"); - printk( "[ BUG: bad unlock balance detected! ]\n"); - printk( "-------------------------------------\n"); + printk("\n"); + printk("=====================================\n"); + printk("[ BUG: bad unlock balance detected! ]\n"); + printk("-------------------------------------\n"); printk("%s/%d is trying to release lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -3484,9 +3490,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, if (debug_locks_silent) return 0; - printk("\n=================================\n"); - printk( "[ BUG: bad contention detected! ]\n"); - printk( "---------------------------------\n"); + printk("\n"); + printk("=================================\n"); + printk("[ BUG: bad contention detected! ]\n"); + printk("---------------------------------\n"); printk("%s/%d is trying to contend lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -3845,9 +3852,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, if (debug_locks_silent) return; - printk("\n=========================\n"); - printk( "[ BUG: held lock freed! ]\n"); - printk( "-------------------------\n"); + printk("\n"); + printk("=========================\n"); + printk("[ BUG: held lock freed! ]\n"); + printk("-------------------------\n"); printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", curr->comm, task_pid_nr(curr), mem_from, mem_to-1); print_lock(hlock); @@ -3901,9 +3909,10 @@ static void print_held_locks_bug(struct task_struct *curr) if (debug_locks_silent) return; - printk("\n=====================================\n"); - printk( "[ BUG: lock held at task exit time! ]\n"); - printk( "-------------------------------------\n"); + printk("\n"); + printk("=====================================\n"); + printk("[ BUG: lock held at task exit time! ]\n"); + printk("-------------------------------------\n"); printk("%s/%d is exiting with locks still held!\n", curr->comm, task_pid_nr(curr)); lockdep_print_held_locks(curr); @@ -3997,16 +4006,17 @@ void lockdep_sys_exit(void) if (unlikely(curr->lockdep_depth)) { if (!debug_locks_off()) return; - printk("\n================================================\n"); - printk( "[ BUG: lock held when returning to user space! ]\n"); - printk( "------------------------------------------------\n"); + printk("\n"); + printk("================================================\n"); + printk("[ BUG: lock held when returning to user space! ]\n"); + printk("------------------------------------------------\n"); printk("%s/%d is leaving the kernel with locks still held!\n", curr->comm, curr->pid); lockdep_print_held_locks(curr); } } -void lockdep_rcu_dereference(const char *file, const int line) +void lockdep_rcu_suspicious(const char *file, const int line, const char *s) { struct task_struct *curr = current; @@ -4015,15 +4025,15 @@ void lockdep_rcu_dereference(const char *file, const int line) return; #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ /* Note: the following can be executed concurrently, so be careful. */ - printk("\n===================================================\n"); - printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); - printk( "---------------------------------------------------\n"); - printk("%s:%d invoked rcu_dereference_check() without protection!\n", - file, line); + printk("\n"); + printk("===============================\n"); + printk("[ INFO: suspicious RCU usage. ]\n"); + printk("-------------------------------\n"); + printk("%s:%d %s!\n", file, line, s); printk("\nother info that might help us debug this:\n\n"); printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); dump_stack(); } -EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); +EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); diff --git a/kernel/pid.c b/kernel/pid.c index e432057f3b2..8cafe7e72ad 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task); */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { - rcu_lockdep_assert(rcu_read_lock_held()); + rcu_lockdep_assert(rcu_read_lock_held(), + "find_task_by_pid_ns() needs rcu_read_lock()" + " protection"); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } diff --git a/kernel/sched.c b/kernel/sched.c index 86efa99b7ca..a223ac3fe6a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4477,6 +4477,7 @@ static inline void schedule_debug(struct task_struct *prev) */ if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) __schedule_bug(prev); + rcu_sleep_check(); profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -8518,6 +8519,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) { static unsigned long prev_jiffy; /* ratelimiting */ + rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || oops_in_progress) return; From d4acbebfe7de0eea996401beb531d18340988aba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jun 2011 12:23:22 +0200 Subject: [PATCH 351/678] sched: Wrap scheduler p->cpus_allowed access This task is preparatory for the migrate_disable() implementation, but stands on its own and provides a cleanup. It currently only converts those sites required for task-placement. Kosaki-san once mentioned replacing cpus_allowed with a proper cpumask_t instead of the NR_CPUS sized array it currently is, that would also require something like this. Signed-off-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: KOSAKI Motohiro Link: http://lkml.kernel.org/n/tip-e42skvaddos99psip0vce41o@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++---- kernel/sched_fair.c | 12 ++++++------ kernel/sched_rt.c | 4 ++-- lib/smp_processor_id.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index a223ac3fe6a..be332836f4e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2445,11 +2445,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p) /* Look for allowed, online CPU in same node. */ for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) return dest_cpu; /* Any allowed, online CPU? */ - dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); + dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); if (dest_cpu < nr_cpu_ids) return dest_cpu; @@ -2486,7 +2486,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ - if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || + if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) || !cpu_online(cpu))) cpu = select_fallback_rq(task_cpu(p), p); @@ -6385,7 +6385,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) if (task_cpu(p) != src_cpu) goto done; /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) goto fail; /* diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 612dda7af5b..82529361c59 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1547,7 +1547,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_cpus(group), - &p->cpus_allowed)) + tsk_cpus_allowed(p))) continue; local_group = cpumask_test_cpu(this_cpu, @@ -1593,7 +1593,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) int i; /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { + for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { load = weighted_cpuload(i); if (load < min_load || (load == min_load && i == this_cpu)) { @@ -1637,7 +1637,7 @@ static int select_idle_sibling(struct task_struct *p, int target) if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) break; - for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { + for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { if (idle_cpu(i)) { target = i; break; @@ -1680,7 +1680,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) int sync = wake_flags & WF_SYNC; if (sd_flag & SD_BALANCE_WAKE) { - if (cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) want_affine = 1; new_cpu = prev_cpu; } @@ -2049,7 +2049,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { + if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) { schedstat_inc(p, se.statistics.nr_failed_migrations_affine); return 0; } @@ -3438,7 +3438,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, * moved to this_cpu */ if (!cpumask_test_cpu(this_cpu, - &busiest->curr->cpus_allowed)) { + tsk_cpus_allowed(busiest->curr))) { raw_spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 046f429be33..25dc8ff04e5 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1199,7 +1199,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && + (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && (p->rt.nr_cpus_allowed > 1)) return 1; return 0; @@ -1344,7 +1344,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) */ if (unlikely(task_rq(task) != rq || !cpumask_test_cpu(lowest_rq->cpu, - &task->cpus_allowed) || + tsk_cpus_allowed(task)) || task_running(rq, task) || !task->on_rq)) { diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 4689cb073da..503f087382a 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) + if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu))) goto out; /* From 11052e05482eb572a081714abf6a1c96905bca78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 25 Jun 2011 15:45:46 +0200 Subject: [PATCH 352/678] sched: Unify the ->cpus_allowed mask copy Currently every sched_class::set_cpus_allowed() implementation has to copy the cpumask into task_struct::cpus_allowed, this is pointless, put this copy in the generic code. Signed-off-by: Peter Zijlstra Acked-by: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-jhl5s9fckd9ptw1fzbqqlrd3@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++---- kernel/sched_rt.c | 3 --- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index be332836f4e..33717fbc2d3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6284,10 +6284,9 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { if (p->sched_class && p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); - else { - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); - } + + cpumask_copy(&p->cpus_allowed, new_mask); + p->rt.nr_cpus_allowed = cpumask_weight(new_mask); } /* diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 25dc8ff04e5..5bc10c1195f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1627,9 +1627,6 @@ static void set_cpus_allowed_rt(struct task_struct *p, update_rt_migration(&rq->rt); } - - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = weight; } /* Assumes rq->lock is held */ From ff494bc0e8a15b5f32406f273646c4e9c5aa00de Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 17 Jul 2011 20:47:54 +0200 Subject: [PATCH 353/678] sched: Don't use tasklist_lock for debug prints Avoid taking locks from debug prints, this avoids latencies on -rt, and improves reliability of the debug code. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 33717fbc2d3..07a338c6ceb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6153,7 +6153,7 @@ void show_state_filter(unsigned long state_filter) printk(KERN_INFO " task PC stack pid father\n"); #endif - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g, p) { /* * reset the NMI-timeout, listing all files on a slow @@ -6169,7 +6169,7 @@ void show_state_filter(unsigned long state_filter) #ifdef CONFIG_SCHED_DEBUG sysrq_sched_debug_show(); #endif - read_unlock(&tasklist_lock); + rcu_read_unlock(); /* * Only show locks if all tasks are dumped: */ From cb5535bc86207cd5b8287da685adef22d5617189 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Oct 2011 15:22:46 -0400 Subject: [PATCH 354/678] sched: Document wait_for_completion_*() return values The return-value convention for these functions varies depending on whether they're interruptible or can timeout. It can be a little confusing--document it. Signed-off-by: J. Bruce Fields Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111006192246.GB28026@fieldses.org Signed-off-by: Ingo Molnar --- kernel/sched.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index 07a338c6ceb..d7e636ed20b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4941,6 +4941,9 @@ EXPORT_SYMBOL(wait_for_completion); * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. The timeout is in jiffies. It is not * interruptible. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. */ unsigned long __sched wait_for_completion_timeout(struct completion *x, unsigned long timeout) @@ -4955,6 +4958,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout); * * This waits for completion of a specific task to be signaled. It is * interruptible. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_interruptible(struct completion *x) { @@ -4972,6 +4977,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); * * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. It is interruptible. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_interruptible_timeout(struct completion *x, @@ -4987,6 +4995,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); * * This waits to be signaled for completion of a specific task. It can be * interrupted by a kill signal. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_killable(struct completion *x) { @@ -5005,6 +5015,9 @@ EXPORT_SYMBOL(wait_for_completion_killable); * This waits for either a completion of a specific task to be * signaled or for a specified timeout to expire. It can be * interrupted by a kill signal. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_killable_timeout(struct completion *x, From 5bf559113378b638b63a34c628b73a5c51a189ef Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Wed, 26 Oct 2011 23:14:16 +0200 Subject: [PATCH 355/678] sched: Set the command name of the idle tasks in SMP kernels In UP systems, the idle task is initialized using the init_task structure from which the command name is taken (currently "swapper"). In SMP systems, one idle task per CPU is forked by the worker thread from which the task structure is copied. The command name is, therefore, "kworker/0:0" or "kworker/0:1", if not updated. Since such update was lacking, all idle tasks in SMP systems were incorrectly named. This longtime bug was not discovered immediately, because there is no /proc/0 entry - the bug only becomes apparent when tracing is enabled. This patch sets the command name of the idle tasks in SMP systems to the name that is used in the INIT_TASK structure suffixed by a slash and the number of the CPU. Signed-off-by: Carsten Emde Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111026211708.768925506@osadl.org Signed-off-by: Ingo Molnar Conflicts: kernel/sched.c --- include/linux/init_task.h | 4 +++- kernel/sched.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d14e058aaee..ef1051a8854 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -126,6 +126,8 @@ extern struct cred init_cred; # define INIT_PERF_EVENTS(tsk) #endif +#define INIT_TASK_COMM "swapper" + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,7 +164,7 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .comm = "swapper", \ + .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ .files = &init_files, \ diff --git a/kernel/sched.c b/kernel/sched.c index d7e636ed20b..34ac76f464b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include @@ -6243,6 +6244,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); +#if defined(CONFIG_SMP) + sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); +#endif } /* From c57df11095fc492752e447cea17e4d5250cb194b Mon Sep 17 00:00:00 2001 From: "he, bo" Date: Wed, 25 Apr 2012 19:59:21 +0800 Subject: [PATCH 356/678] sched: Fix OOPS when build_sched_domains() percpu allocation fails commit fb2cf2c660971bea0ad86a9a5c19ad39eab61344 upstream. Under extreme memory used up situations, percpu allocation might fail. We hit it when system goes to suspend-to-ram, causing a kworker panic: EIP: [] build_sched_domains+0x23a/0xad0 Kernel panic - not syncing: Fatal exception Pid: 3026, comm: kworker/u:3 3.0.8-137473-gf42fbef #1 Call Trace: [] panic+0x66/0x16c [...] [] partition_sched_domains+0x287/0x4b0 [] cpuset_update_active_cpus+0x1fe/0x210 [] cpuset_cpu_inactive+0x1d/0x30 [...] With this fix applied build_sched_domains() will return -ENOMEM and the suspend attempt fails. Signed-off-by: he, bo Reviewed-by: Zhang, Yanmin Reviewed-by: Srivatsa S. Bhat Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1335355161.5892.17.camel@hebo [ So, we fail to deallocate a CPU because we cannot allocate RAM :-/ I don't like that kind of sad behavior but nevertheless it should not crash under high memory load. ] Signed-off-by: Ingo Molnar [bwh: Backported to 3.2: change filename] Signed-off-by: Ben Hutchings --- kernel/sched.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 34ac76f464b..5438aca2839 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7717,16 +7717,26 @@ static void __sdt_free(const struct cpumask *cpu_map) struct sd_data *sdd = &tl->data; for_each_cpu(j, cpu_map) { - struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); - if (sd && (sd->flags & SD_OVERLAP)) - free_sched_groups(sd->groups, 0); - kfree(*per_cpu_ptr(sdd->sd, j)); - kfree(*per_cpu_ptr(sdd->sg, j)); - kfree(*per_cpu_ptr(sdd->sgp, j)); + struct sched_domain *sd; + + if (sdd->sd) { + sd = *per_cpu_ptr(sdd->sd, j); + if (sd && (sd->flags & SD_OVERLAP)) + free_sched_groups(sd->groups, 0); + kfree(*per_cpu_ptr(sdd->sd, j)); + } + + if (sdd->sg) + kfree(*per_cpu_ptr(sdd->sg, j)); + if (sdd->sgp) + kfree(*per_cpu_ptr(sdd->sgp, j)); } free_percpu(sdd->sd); + sdd->sd = NULL; free_percpu(sdd->sg); + sdd->sg = NULL; free_percpu(sdd->sgp); + sdd->sgp = NULL; } } From 95c783d8112c27d6473a7b00c73e9c33a049f461 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 5 Jun 2012 13:44:36 -0500 Subject: [PATCH 357/678] sched: Fix the relax_domain_level boot parameter commit a841f8cef4bb124f0f5563314d0beaf2e1249d72 upstream. It does not get processed because sched_domain_level_max is 0 at the time that setup_relax_domain_level() is run. Simply accept the value as it is, as we don't know the value of sched_domain_level_max until sched domain construction is completed. Fix sched_relax_domain_level in cpuset. The build_sched_domain() routine calls the set_domain_attribute() routine prior to setting the sd->level, however, the set_domain_attribute() routine relies on the sd->level to decide whether idle load balancing will be off/on. Signed-off-by: Dimitri Sivanich Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120605184436.GA15668@sgi.com Signed-off-by: Ingo Molnar [bwh: Backported to 3.2: adjust the filename] Signed-off-by: Ben Hutchings --- kernel/sched.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 5438aca2839..bfebbdbfb81 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7542,11 +7542,8 @@ int sched_domain_level_max; static int __init setup_relax_domain_level(char *str) { - unsigned long val; - - val = simple_strtoul(str, NULL, 0); - if (val < sched_domain_level_max) - default_relax_domain_level = val; + if (kstrtoint(str, 0, &default_relax_domain_level)) + pr_warn("Unable to set relax_domain_level\n"); return 1; } @@ -7749,7 +7746,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, if (!sd) return child; - set_domain_attribute(sd, attr); cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); if (child) { sd->level = child->level + 1; @@ -7757,6 +7753,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, child->parent = sd; } sd->child = child; + set_domain_attribute(sd, attr); return sd; } From 9053af0dccb7fa190b6892bcef5724c035c03fd9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jun 2012 13:36:05 +0200 Subject: [PATCH 358/678] sched: Fix race in task_group() commit 8323f26ce3425460769605a6aece7a174edaa7d1 upstream Stefan reported a crash on a kernel before a3e5d1091c1 ("sched: Don't call task_group() too many times in set_task_rq()"), he found the reason to be that the multiple task_group() invocations in set_task_rq() returned different values. Looking at all that I found a lack of serialization and plain wrong comments. The below tries to fix it using an extra pointer which is updated under the appropriate scheduler locks. Its not pretty, but I can't really see another way given how all the cgroup stuff works. Reported-and-tested-by: Stefan Bader Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340364965.18025.71.camel@twins Signed-off-by: Ingo Molnar (backported to previous file names and layout) Signed-off-by: Stefan Bader Signed-off-by: Ben Hutchings --- include/linux/init_task.h | 12 +++++++++++- include/linux/sched.h | 5 ++++- kernel/sched.c | 32 ++++++++++++++++++-------------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index ef1051a8854..8499db0657f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -117,8 +117,17 @@ extern struct group_info init_groups; extern struct cred init_cred; +extern struct task_group root_task_group; + +#ifdef CONFIG_CGROUP_SCHED +# define INIT_CGROUP_SCHED(tsk) \ + .sched_task_group = &root_task_group, +#else +# define INIT_CGROUP_SCHED(tsk) +#endif + #ifdef CONFIG_PERF_EVENTS -# define INIT_PERF_EVENTS(tsk) \ +# define INIT_PERF_EVENTS(tsk) \ .perf_event_mutex = \ __MUTEX_INITIALIZER(tsk.perf_event_mutex), \ .perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list), @@ -155,6 +164,7 @@ extern struct cred init_cred; }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ + INIT_CGROUP_SCHED(tsk) \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index c404b24397c..306ec7b4305 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1238,6 +1238,9 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_CGROUP_SCHED + struct task_group *sched_task_group; +#endif #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ @@ -2643,7 +2646,7 @@ extern int sched_group_set_rt_period(struct task_group *tg, extern long sched_group_rt_period(struct task_group *tg); extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); #endif -#endif +#endif /* CONFIG_CGROUP_SCHED */ extern int task_can_switch_user(struct user_struct *up, struct task_struct *tsk); diff --git a/kernel/sched.c b/kernel/sched.c index bfebbdbfb81..dc6cf43b37e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -620,22 +620,19 @@ static inline int cpu_of(struct rq *rq) /* * Return the group to which this tasks belongs. * - * We use task_subsys_state_check() and extend the RCU verification with - * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each - * task it moves into the cgroup. Therefore by holding either of those locks, - * we pin the task to the current cgroup. + * We cannot use task_subsys_state() and friends because the cgroup + * subsystem changes that value before the cgroup_subsys::attach() method + * is called, therefore we cannot pin it and might observe the wrong value. + * + * The same is true for autogroup's p->signal->autogroup->tg, the autogroup + * core changes this before calling sched_move_task(). + * + * Instead we use a 'copy' which is updated from sched_move_task() while + * holding both task_struct::pi_lock and rq::lock. */ static inline struct task_group *task_group(struct task_struct *p) { - struct task_group *tg; - struct cgroup_subsys_state *css; - - css = task_subsys_state_check(p, cpu_cgroup_subsys_id, - lockdep_is_held(&p->pi_lock) || - lockdep_is_held(&task_rq(p)->lock)); - tg = container_of(css, struct task_group, css); - - return autogroup_task_group(p, tg); + return p->sched_task_group; } /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ @@ -2274,7 +2271,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. * * sched_move_task() holds both and thus holding either pins the cgroup, - * see set_task_rq(). + * see task_group(). * * Furthermore, all task_rq users should acquire both locks, see * task_rq_lock(). @@ -8913,6 +8910,7 @@ void sched_destroy_group(struct task_group *tg) */ void sched_move_task(struct task_struct *tsk) { + struct task_group *tg; int on_rq, running; unsigned long flags; struct rq *rq; @@ -8927,6 +8925,12 @@ void sched_move_task(struct task_struct *tsk) if (unlikely(running)) tsk->sched_class->put_prev_task(rq, tsk); + tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id, + lockdep_is_held(&tsk->sighand->siglock)), + struct task_group, css); + tg = autogroup_task_group(tsk, tg); + tsk->sched_task_group = tg; + #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->task_move_group) tsk->sched_class->task_move_group(tsk, on_rq); From 6ca61159d3cd165d373f09a2cf903c1ae9080067 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 8 Aug 2012 11:27:15 +0200 Subject: [PATCH 359/678] sched: fix divide by zero at {thread_group,task}_times commit bea6832cc8c4a0a9a65dd17da6aaa657fe27bc3e upstream. On architectures where cputime_t is 64 bit type, is possible to trigger divide by zero on do_div(temp, (__force u32) total) line, if total is a non zero number but has lower 32 bit's zeroed. Removing casting is not a good solution since some do_div() implementations do cast to u32 internally. This problem can be triggered in practice on very long lived processes: PID: 2331 TASK: ffff880472814b00 CPU: 2 COMMAND: "oraagent.bin" #0 [ffff880472a51b70] machine_kexec at ffffffff8103214b #1 [ffff880472a51bd0] crash_kexec at ffffffff810b91c2 #2 [ffff880472a51ca0] oops_end at ffffffff814f0b00 #3 [ffff880472a51cd0] die at ffffffff8100f26b #4 [ffff880472a51d00] do_trap at ffffffff814f03f4 #5 [ffff880472a51d60] do_divide_error at ffffffff8100cfff #6 [ffff880472a51e00] divide_error at ffffffff8100be7b [exception RIP: thread_group_times+0x56] RIP: ffffffff81056a16 RSP: ffff880472a51eb8 RFLAGS: 00010046 RAX: bc3572c9fe12d194 RBX: ffff880874150800 RCX: 0000000110266fad RDX: 0000000000000000 RSI: ffff880472a51eb8 RDI: 001038ae7d9633dc RBP: ffff880472a51ef8 R8: 00000000b10a3a64 R9: ffff880874150800 R10: 00007fcba27ab680 R11: 0000000000000202 R12: ffff880472a51f08 R13: ffff880472a51f10 R14: 0000000000000000 R15: 0000000000000007 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff880472a51f00] do_sys_times at ffffffff8108845d #8 [ffff880472a51f40] sys_times at ffffffff81088524 #9 [ffff880472a51f80] system_call_fastpath at ffffffff8100b0f2 RIP: 0000003808caac3a RSP: 00007fcba27ab6d8 RFLAGS: 00000202 RAX: 0000000000000064 RBX: ffffffff8100b0f2 RCX: 0000000000000000 RDX: 00007fcba27ab6e0 RSI: 000000000076d58e RDI: 00007fcba27ab6e0 RBP: 00007fcba27ab700 R8: 0000000000000020 R9: 000000000000091b R10: 00007fcba27ab680 R11: 0000000000000202 R12: 00007fff9ca41940 R13: 0000000000000000 R14: 00007fcba27ac9c0 R15: 00007fff9ca41940 ORIG_RAX: 0000000000000064 CS: 0033 SS: 002b Signed-off-by: Stanislaw Gruszka Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120808092714.GA3580@redhat.com Signed-off-by: Thomas Gleixner [bwh: Backported to 3.2: - Adjust filename - Most conversions in the original code are implicit] Signed-off-by: Ben Hutchings --- kernel/sched.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index dc6cf43b37e..6f9a7ea393c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4297,6 +4297,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) #endif +static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +{ + u64 temp = (__force u64) rtime; + + temp *= (__force u64) utime; + + if (sizeof(cputime_t) == 4) + temp = div_u64(temp, (__force u32) total); + else + temp = div64_u64(temp, (__force u64) total); + + return (__force cputime_t) temp; +} + void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); @@ -4306,13 +4320,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) */ rtime = nsecs_to_cputime(p->se.sum_exec_runtime); - if (total) { - u64 temp = rtime; - - temp *= utime; - do_div(temp, total); - utime = (cputime_t)temp; - } else + if (total) + utime = scale_utime(utime, rtime, total); + else utime = rtime; /* @@ -4339,13 +4349,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) total = cputime_add(cputime.utime, cputime.stime); rtime = nsecs_to_cputime(cputime.sum_exec_runtime); - if (total) { - u64 temp = rtime; - - temp *= cputime.utime; - do_div(temp, total); - utime = (cputime_t)temp; - } else + if (total) + utime = scale_utime(cputime.utime, rtime, total); + else utime = rtime; sig->prev_utime = max(sig->prev_utime, utime); From 841af073122e8cf7c5a11c2dfdc90713fae5788d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Sep 2011 14:00:42 +0800 Subject: [PATCH 360/678] llist: Make some llist functions inline Because llist code will be used in performance critical scheduler code path, make llist_add() and llist_del_all() inline to avoid function calling overhead and related 'glue' overhead. Signed-off-by: Huang Ying Acked-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1315461646-1379-2-git-send-email-ying.huang@intel.com Signed-off-by: Ingo Molnar --- drivers/acpi/apei/Kconfig | 1 - include/linux/llist.h | 64 +++++++++++++++++++++++++++++++++++---- lib/Kconfig | 3 -- lib/Makefile | 4 +-- lib/llist.c | 40 ------------------------ 5 files changed, 59 insertions(+), 53 deletions(-) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index e3f47872ec2..f0c1ce95a0e 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -14,7 +14,6 @@ config ACPI_APEI_GHES depends on ACPI_APEI && X86 select ACPI_HED select IRQ_WORK - select LLIST select GENERIC_ALLOCATOR help Generic Hardware Error Source provides a way to report diff --git a/include/linux/llist.h b/include/linux/llist.h index aa0c8b5b3cd..3eccdfd6609 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -37,8 +37,28 @@ * architectures that don't have NMI-safe cmpxchg implementation, the * list can NOT be used in NMI handler. So code uses the list in NMI * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * + * Copyright 2010,2011 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include +#include +#include + struct llist_head { struct llist_node *first; }; @@ -113,14 +133,46 @@ static inline void init_llist_head(struct llist_head *list) * test whether the list is empty without deleting something from the * list. */ -static inline int llist_empty(const struct llist_head *head) +static inline bool llist_empty(const struct llist_head *head) { return ACCESS_ONCE(head->first) == NULL; } -void llist_add(struct llist_node *new, struct llist_head *head); -void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, - struct llist_head *head); -struct llist_node *llist_del_first(struct llist_head *head); -struct llist_node *llist_del_all(struct llist_head *head); +/** + * llist_add - add a new entry + * @new: new entry to be added + * @head: the head for your lock-less list + */ +static inline void llist_add(struct llist_node *new, struct llist_head *head) +{ + struct llist_node *entry, *old_entry; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + entry = head->first; + do { + old_entry = entry; + new->next = entry; + cpu_relax(); + } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); +} + +/** + * llist_del_all - delete all entries from lock-less list + * @head: the head of lock-less list to delete all entries + * + * If list is empty, return NULL, otherwise, delete all entries and + * return the pointer to the first entry. The order of entries + * deleted is from the newest to the oldest added one. + */ +static inline struct llist_node *llist_del_all(struct llist_head *head) +{ +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + return xchg(&head->first, NULL); +} #endif /* LLIST_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 6c695ff9cab..32f3e5ae2be 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -276,7 +276,4 @@ config CORDIC so its calculations are in fixed point. Modules can select this when they require this function. Module will be called cordic. -config LLIST - bool - endmenu diff --git a/lib/Makefile b/lib/Makefile index 982dbf01b21..3a74ec91d05 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ - bsearch.o find_last_bit.o find_next_bit.o + bsearch.o find_last_bit.o find_next_bit.o llist.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o @@ -115,8 +115,6 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o obj-$(CONFIG_CORDIC) += cordic.o -obj-$(CONFIG_LLIST) += llist.o - hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/llist.c b/lib/llist.c index da445724fa1..3e3fa9139c4 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -29,28 +29,6 @@ #include -/** - * llist_add - add a new entry - * @new: new entry to be added - * @head: the head for your lock-less list - */ -void llist_add(struct llist_node *new, struct llist_head *head) -{ - struct llist_node *entry, *old_entry; - -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - - entry = head->first; - do { - old_entry = entry; - new->next = entry; - cpu_relax(); - } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); -} -EXPORT_SYMBOL_GPL(llist_add); - /** * llist_add_batch - add several linked entries in batch * @new_first: first entry in batch to be added @@ -109,21 +87,3 @@ struct llist_node *llist_del_first(struct llist_head *head) return entry; } EXPORT_SYMBOL_GPL(llist_del_first); - -/** - * llist_del_all - delete all entries from lock-less list - * @head: the head of lock-less list to delete all entries - * - * If list is empty, return NULL, otherwise, delete all entries and - * return the pointer to the first entry. The order of entries - * deleted is from the newest to the oldest added one. - */ -struct llist_node *llist_del_all(struct llist_head *head) -{ -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - - return xchg(&head->first, NULL); -} -EXPORT_SYMBOL_GPL(llist_del_all); From 8e007b8073e0c413c1acce3a2a10adb559f2b350 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 4 Oct 2011 12:43:11 +0200 Subject: [PATCH 361/678] llist: Remove the platform-dependent NMI checks Remove the nmi() checks spread around the code. in_nmi() is not available on every architecture and it's a pretty obscure and ugly check in any case. Cc: Huang Ying Cc: Mathieu Desnoyers Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1315461646-1379-3-git-send-email-ying.huang@intel.com Signed-off-by: Ingo Molnar --- include/linux/llist.h | 12 ++---------- lib/llist.c | 12 ++---------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/include/linux/llist.h b/include/linux/llist.h index 3eccdfd6609..65fca1cbf51 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -35,8 +35,8 @@ * * The basic atomic operation of this list is cmpxchg on long. On * architectures that don't have NMI-safe cmpxchg implementation, the - * list can NOT be used in NMI handler. So code uses the list in NMI - * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * list can NOT be used in NMI handlers. So code that uses the list in + * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * Copyright 2010,2011 Intel Corp. * Author: Huang Ying @@ -147,10 +147,6 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head) { struct llist_node *entry, *old_entry; -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - entry = head->first; do { old_entry = entry; @@ -169,10 +165,6 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head) */ static inline struct llist_node *llist_del_all(struct llist_head *head) { -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - return xchg(&head->first, NULL); } #endif /* LLIST_H */ diff --git a/lib/llist.c b/lib/llist.c index 3e3fa9139c4..b445f2c8596 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -3,8 +3,8 @@ * * The basic atomic operation of this list is cmpxchg on long. On * architectures that don't have NMI-safe cmpxchg implementation, the - * list can NOT be used in NMI handler. So code uses the list in NMI - * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * list can NOT be used in NMI handlers. So code that uses the list in + * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * Copyright 2010,2011 Intel Corp. * Author: Huang Ying @@ -40,10 +40,6 @@ void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, { struct llist_node *entry, *old_entry; -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - entry = head->first; do { old_entry = entry; @@ -71,10 +67,6 @@ struct llist_node *llist_del_first(struct llist_head *head) { struct llist_node *entry, *old_entry, *next; -#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - BUG_ON(in_nmi()); -#endif - entry = head->first; do { if (entry == NULL) From 6fc0cc289227439d0ecf59e04bf4409ffc6abda3 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Sep 2011 14:00:44 +0800 Subject: [PATCH 362/678] llist: Move cpu_relax() to after the cmpxchg() If in llist_add()/etc. functions the first cmpxchg() call succeeds, it is not necessary to use cpu_relax() before the cmpxchg(). So cpu_relax() in a busy loop involving cmpxchg() should go after cmpxchg() instead of before that. This patch fixes this for all involved llist functions. Signed-off-by: Huang Ying Acked-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1315461646-1379-4-git-send-email-ying.huang@intel.com Signed-off-by: Ingo Molnar --- include/linux/llist.h | 7 +++++-- lib/llist.c | 14 ++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/linux/llist.h b/include/linux/llist.h index 65fca1cbf51..ca91875286b 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -148,11 +148,14 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head) struct llist_node *entry, *old_entry; entry = head->first; - do { + for (;;) { old_entry = entry; new->next = entry; + entry = cmpxchg(&head->first, old_entry, new); + if (entry == old_entry) + break; cpu_relax(); - } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); + } } /** diff --git a/lib/llist.c b/lib/llist.c index b445f2c8596..6c69f1d14c4 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -41,11 +41,14 @@ void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_node *entry, *old_entry; entry = head->first; - do { + for (;;) { old_entry = entry; new_last->next = entry; + entry = cmpxchg(&head->first, old_entry, new_first); + if (entry == old_entry) + break; cpu_relax(); - } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry); + } } EXPORT_SYMBOL_GPL(llist_add_batch); @@ -68,13 +71,16 @@ struct llist_node *llist_del_first(struct llist_head *head) struct llist_node *entry, *old_entry, *next; entry = head->first; - do { + for (;;) { if (entry == NULL) return NULL; old_entry = entry; next = entry->next; + entry = cmpxchg(&head->first, old_entry, next); + if (entry == old_entry) + break; cpu_relax(); - } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry); + } return entry; } From 2189fd21f98394da653bb18baf58b836a347a215 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Sep 2011 14:00:45 +0800 Subject: [PATCH 363/678] llist: Return whether list is empty before adding in llist_add() Extend the llist_add*() functions to return a success indicator, this allows us in the scheduler code to send an IPI if the queue was empty. ( There's no effect on existing users, because the list_add_xxx() functions are inline, thus this will be optimized out by the compiler if not used by callers. ) Signed-off-by: Huang Ying Cc: Mathieu Desnoyers Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1315461646-1379-5-git-send-email-ying.huang@intel.com Signed-off-by: Ingo Molnar --- include/linux/llist.h | 6 +++++- lib/llist.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/llist.h b/include/linux/llist.h index ca91875286b..27bbdf5ddf8 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -142,8 +142,10 @@ static inline bool llist_empty(const struct llist_head *head) * llist_add - add a new entry * @new: new entry to be added * @head: the head for your lock-less list + * + * Return whether list is empty before adding. */ -static inline void llist_add(struct llist_node *new, struct llist_head *head) +static inline bool llist_add(struct llist_node *new, struct llist_head *head) { struct llist_node *entry, *old_entry; @@ -156,6 +158,8 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head) break; cpu_relax(); } + + return old_entry == NULL; } /** diff --git a/lib/llist.c b/lib/llist.c index 6c69f1d14c4..878985c4d19 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -34,8 +34,10 @@ * @new_first: first entry in batch to be added * @new_last: last entry in batch to be added * @head: the head for your lock-less list + * + * Return whether list is empty before adding. */ -void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, +bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head) { struct llist_node *entry, *old_entry; @@ -49,6 +51,8 @@ void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, break; cpu_relax(); } + + return old_entry == NULL; } EXPORT_SYMBOL_GPL(llist_add_batch); From 2ca3b5273f6c21f66126c15e5f4185af93427654 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Sep 2011 14:00:46 +0800 Subject: [PATCH 364/678] irq_work: Use llist in the struct irq_work logic Use llist in irq_work instead of the lock-less linked list implementation in irq_work to avoid the code duplication. Signed-off-by: Huang Ying Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1315461646-1379-6-git-send-email-ying.huang@intel.com Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 15 ++++--- kernel/irq_work.c | 91 +++++++++++++++------------------------- 2 files changed, 42 insertions(+), 64 deletions(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 4fa09d4d0b7..6a9e8f5399e 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -1,20 +1,23 @@ #ifndef _LINUX_IRQ_WORK_H #define _LINUX_IRQ_WORK_H +#include + struct irq_work { - struct irq_work *next; + unsigned long flags; + struct llist_node llnode; void (*func)(struct irq_work *); }; static inline -void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *)) +void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) { - entry->next = NULL; - entry->func = func; + work->flags = 0; + work->func = func; } -bool irq_work_queue(struct irq_work *entry); +bool irq_work_queue(struct irq_work *work); void irq_work_run(void); -void irq_work_sync(struct irq_work *entry); +void irq_work_sync(struct irq_work *work); #endif /* _LINUX_IRQ_WORK_H */ diff --git a/kernel/irq_work.c b/kernel/irq_work.c index c58fa7da8ae..6f0a4310def 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -17,54 +17,34 @@ * claimed NULL, 3 -> {pending} : claimed to be enqueued * pending next, 3 -> {busy} : queued, pending callback * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed - * - * We use the lower two bits of the next pointer to keep PENDING and BUSY - * flags. */ #define IRQ_WORK_PENDING 1UL #define IRQ_WORK_BUSY 2UL #define IRQ_WORK_FLAGS 3UL -static inline bool irq_work_is_set(struct irq_work *entry, int flags) -{ - return (unsigned long)entry->next & flags; -} - -static inline struct irq_work *irq_work_next(struct irq_work *entry) -{ - unsigned long next = (unsigned long)entry->next; - next &= ~IRQ_WORK_FLAGS; - return (struct irq_work *)next; -} - -static inline struct irq_work *next_flags(struct irq_work *entry, int flags) -{ - unsigned long next = (unsigned long)entry; - next |= flags; - return (struct irq_work *)next; -} - -static DEFINE_PER_CPU(struct irq_work *, irq_work_list); +static DEFINE_PER_CPU(struct llist_head, irq_work_list); /* * Claim the entry so that no one else will poke at it. */ -static bool irq_work_claim(struct irq_work *entry) +static bool irq_work_claim(struct irq_work *work) { - struct irq_work *next, *nflags; + unsigned long flags, nflags; - do { - next = entry->next; - if ((unsigned long)next & IRQ_WORK_PENDING) + for (;;) { + flags = work->flags; + if (flags & IRQ_WORK_PENDING) return false; - nflags = next_flags(next, IRQ_WORK_FLAGS); - } while (cmpxchg(&entry->next, next, nflags) != next); + nflags = flags | IRQ_WORK_FLAGS; + if (cmpxchg(&work->flags, flags, nflags) == flags) + break; + cpu_relax(); + } return true; } - void __weak arch_irq_work_raise(void) { /* @@ -75,20 +55,15 @@ void __weak arch_irq_work_raise(void) /* * Queue the entry and raise the IPI if needed. */ -static void __irq_work_queue(struct irq_work *entry) +static void __irq_work_queue(struct irq_work *work) { - struct irq_work *next; + bool empty; preempt_disable(); - do { - next = __this_cpu_read(irq_work_list); - /* Can assign non-atomic because we keep the flags set. */ - entry->next = next_flags(next, IRQ_WORK_FLAGS); - } while (this_cpu_cmpxchg(irq_work_list, next, entry) != next); - + empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); /* The list was empty, raise self-interrupt to start processing. */ - if (!irq_work_next(entry)) + if (empty) arch_irq_work_raise(); preempt_enable(); @@ -100,16 +75,16 @@ static void __irq_work_queue(struct irq_work *entry) * * Can be re-enqueued while the callback is still in progress. */ -bool irq_work_queue(struct irq_work *entry) +bool irq_work_queue(struct irq_work *work) { - if (!irq_work_claim(entry)) { + if (!irq_work_claim(work)) { /* * Already enqueued, can't do! */ return false; } - __irq_work_queue(entry); + __irq_work_queue(work); return true; } EXPORT_SYMBOL_GPL(irq_work_queue); @@ -120,34 +95,34 @@ EXPORT_SYMBOL_GPL(irq_work_queue); */ void irq_work_run(void) { - struct irq_work *list; + struct irq_work *work; + struct llist_head *this_list; + struct llist_node *llnode; - if (this_cpu_read(irq_work_list) == NULL) + this_list = &__get_cpu_var(irq_work_list); + if (llist_empty(this_list)) return; BUG_ON(!in_irq()); BUG_ON(!irqs_disabled()); - list = this_cpu_xchg(irq_work_list, NULL); - - while (list != NULL) { - struct irq_work *entry = list; + llnode = llist_del_all(this_list); + while (llnode != NULL) { + work = llist_entry(llnode, struct irq_work, llnode); - list = irq_work_next(list); + llnode = llnode->next; /* - * Clear the PENDING bit, after this point the @entry + * Clear the PENDING bit, after this point the @work * can be re-used. */ - entry->next = next_flags(NULL, IRQ_WORK_BUSY); - entry->func(entry); + work->flags = IRQ_WORK_BUSY; + work->func(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - (void)cmpxchg(&entry->next, - next_flags(NULL, IRQ_WORK_BUSY), - NULL); + (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); } } EXPORT_SYMBOL_GPL(irq_work_run); @@ -156,11 +131,11 @@ EXPORT_SYMBOL_GPL(irq_work_run); * Synchronize against the irq_work @entry, ensures the entry is not * currently in use. */ -void irq_work_sync(struct irq_work *entry) +void irq_work_sync(struct irq_work *work) { WARN_ON_ONCE(irqs_disabled()); - while (irq_work_is_set(entry, IRQ_WORK_BUSY)) + while (work->flags & IRQ_WORK_BUSY) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); From 6d0cfc962a77f79fbf6b38ab216616bfc2a2b157 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 Sep 2011 13:12:28 +0200 Subject: [PATCH 365/678] llist: Add llist_next() So we don't have to expose the struct list_node member. Cc: Huang Ying Cc: Andrew Morton Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1315836348.26517.41.camel@twins Signed-off-by: Ingo Molnar --- include/linux/llist.h | 5 +++++ kernel/irq_work.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/llist.h b/include/linux/llist.h index 27bbdf5ddf8..e2e96d04ee4 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -138,6 +138,11 @@ static inline bool llist_empty(const struct llist_head *head) return ACCESS_ONCE(head->first) == NULL; } +static inline struct llist_node *llist_next(struct llist_node *node) +{ + return node->next; +} + /** * llist_add - add a new entry * @new: new entry to be added diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 6f0a4310def..0e2cde4f380 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -110,7 +110,7 @@ void irq_work_run(void) while (llnode != NULL) { work = llist_entry(llnode, struct irq_work, llnode); - llnode = llnode->next; + llnode = llist_next(llnode); /* * Clear the PENDING bit, after this point the @work From 450b1299462607467315fc546fd680e5079ca435 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 Sep 2011 15:50:49 +0200 Subject: [PATCH 366/678] llist: Remove cpu_relax() usage in cmpxchg loops Initial benchmarks show they're a net loss: $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i; done $ echo 4096 32000 64 128 > /proc/sys/kernel/sem $ ./sembench -t 2048 -w 1900 -o 0 Pre: run time 30 seconds 778936 worker burns per second run time 30 seconds 912190 worker burns per second run time 30 seconds 817506 worker burns per second run time 30 seconds 830870 worker burns per second run time 30 seconds 845056 worker burns per second Post: run time 30 seconds 905920 worker burns per second run time 30 seconds 849046 worker burns per second run time 30 seconds 886286 worker burns per second run time 30 seconds 822320 worker burns per second run time 30 seconds 900283 worker burns per second So about 4% faster. (!) cpu_relax() stalls the pipeline, therefore, when used in a tight loop it has the following benefits: - allows SMT siblings to have a go; - reduces pressure on the CPU interconnect. However, cmpxchg loops are unfair and thus have unbounded completion time, therefore we should avoid getting in such heavily contended situations where the above benefits make any difference. A typical cmpxchg loop should not go round more than a handfull of times at worst, therefore adding extra delays just slows things down. Since the llist primitives are new, there aren't any bad users yet, and we should avoid growing them. Heavily contended sites should generally be better off using the ticket locks for serialization since they provide bounded completion times (fifo-fair over the cpus). Signed-off-by: Peter Zijlstra Cc: Huang Ying Cc: Andrew Morton Link: http://lkml.kernel.org/r/1315836358.26517.43.camel@twins Signed-off-by: Ingo Molnar --- include/linux/llist.h | 1 - lib/llist.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/linux/llist.h b/include/linux/llist.h index e2e96d04ee4..837fb4ae66f 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -161,7 +161,6 @@ static inline bool llist_add(struct llist_node *new, struct llist_head *head) entry = cmpxchg(&head->first, old_entry, new); if (entry == old_entry) break; - cpu_relax(); } return old_entry == NULL; diff --git a/lib/llist.c b/lib/llist.c index 878985c4d19..700cff77a38 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -49,7 +49,6 @@ bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, entry = cmpxchg(&head->first, old_entry, new_first); if (entry == old_entry) break; - cpu_relax(); } return old_entry == NULL; @@ -83,7 +82,6 @@ struct llist_node *llist_del_first(struct llist_head *head) entry = cmpxchg(&head->first, old_entry, next); if (entry == old_entry) break; - cpu_relax(); } return entry; From c3a8f0db9189ecbda24b6a423bd9cc55e7ffd0a9 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 5 Oct 2011 17:25:28 +1100 Subject: [PATCH 367/678] llist: Add back llist_add_batch() and llist_del_first() prototypes Commit 1230db8e1543 ("llist: Make some llist functions inline") has deleted the definitions, causing problems for (not upstream yet) code that tries to make use of them. Signed-off-by: Stephen Rothwell Acked-by: Peter Zijlstra Cc: Huang Ying Cc: David Miller Link: http://lkml.kernel.org/r/20111005172528.0d0a8afc65acef7ace22a24e@canb.auug.org.au Signed-off-by: Ingo Molnar --- include/linux/llist.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/llist.h b/include/linux/llist.h index 837fb4ae66f..7287734e08d 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -178,4 +178,10 @@ static inline struct llist_node *llist_del_all(struct llist_head *head) { return xchg(&head->first, NULL); } + +extern bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head); +extern struct llist_node *llist_del_first(struct llist_head *head); + #endif /* LLIST_H */ From 017d63788863b4c0f2900ea836c48f0d4e104785 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 31 Oct 2011 17:13:08 -0700 Subject: [PATCH 368/678] llist-return-whether-list-is-empty-before-adding-in-llist_add-fix clarify comment Cc: Huang Ying Cc: Mathieu Desnoyers Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/llist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/llist.h b/include/linux/llist.h index 7287734e08d..801b44b07aa 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -148,7 +148,7 @@ static inline struct llist_node *llist_next(struct llist_node *node) * @new: new entry to be added * @head: the head for your lock-less list * - * Return whether list is empty before adding. + * Returns true if the list was empty prior to adding this entry. */ static inline bool llist_add(struct llist_node *new, struct llist_head *head) { From 2ee5e1ba729569f88035762fc0e40a20e8109cfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Mon, 1 Aug 2011 11:03:28 +0200 Subject: [PATCH 369/678] sched: Remove rq->avg_load_per_task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit a2d47777 ("sched: fix stale value in average load per task") the variable rq->avg_load_per_task is no longer required. Remove it. Signed-off-by: Jan H. Schönherr Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1312189408-17172-1-git-send-email-schnhrr@cs.tu-berlin.de Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 6f9a7ea393c..2eb804691ef 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -527,8 +527,6 @@ struct rq { int cpu; int online; - unsigned long avg_load_per_task; - u64 rt_avg; u64 age_stamp; u64 idle_stamp; @@ -1589,11 +1587,9 @@ static unsigned long cpu_avg_load_per_task(int cpu) unsigned long nr_running = ACCESS_ONCE(rq->nr_running); if (nr_running) - rq->avg_load_per_task = rq->load.weight / nr_running; - else - rq->avg_load_per_task = 0; + return rq->load.weight / nr_running; - return rq->avg_load_per_task; + return 0; } #ifdef CONFIG_PREEMPT From 8c756c47ccf6bf3334a4d39a7ef43778e0aaf68e Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 27 Jul 2011 17:14:55 +0200 Subject: [PATCH 370/678] sched: fix broken SCHED_RESET_ON_FORK handling Setting child->prio = current->normal_prio _after_ SCHED_RESET_ON_FORK has been handled for an RT parent gives birth to a deranged mutant child with non-RT policy, but RT prio and sched_class. Move PI leakage protection up, always set priorities and weight, and if the child is leaving RT class, reset rt_priority to the proper value. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1311779695.8691.2.camel@marge.simson.net Signed-off-by: Ingo Molnar --- kernel/sched.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 2eb804691ef..86a24edff7d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2876,20 +2876,24 @@ void sched_fork(struct task_struct *p) */ p->state = TASK_RUNNING; + /* + * Make sure we do not leak PI boosting priority to the child. + */ + p->prio = current->normal_prio; + /* * Revert to default priority/policy on fork if requested. */ if (unlikely(p->sched_reset_on_fork)) { - if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { + if (task_has_rt_policy(p)) { p->policy = SCHED_NORMAL; - p->normal_prio = p->static_prio; - } - - if (PRIO_TO_NICE(p->static_prio) < 0) { p->static_prio = NICE_TO_PRIO(0); - p->normal_prio = p->static_prio; - set_load_weight(p); - } + p->rt_priority = 0; + } else if (PRIO_TO_NICE(p->static_prio) < 0) + p->static_prio = NICE_TO_PRIO(0); + + p->prio = p->normal_prio = __normal_prio(p); + set_load_weight(p); /* * We don't need the reset flag anymore after the fork. It has @@ -2898,11 +2902,6 @@ void sched_fork(struct task_struct *p) p->sched_reset_on_fork = 0; } - /* - * Make sure we do not leak PI boosting priority to the child. - */ - p->prio = current->normal_prio; - if (!rt_prio(p->prio)) p->sched_class = &fair_sched_class; From 53bd6e4a3c0ad52d9203e42fe493689db89615dd Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:27 -0700 Subject: [PATCH 371/678] sched: Implement hierarchical task accounting for SCHED_OTHER Introduce hierarchical task accounting for the group scheduling case in CFS, as well as promoting the responsibility for maintaining rq->nr_running to the scheduling classes. The primary motivation for this is that with scheduling classes supporting bandwidth throttling it is possible for entities participating in throttled sub-trees to not have root visible changes in rq->nr_running across activate and de-activate operations. This in turn leads to incorrect idle and weight-per-task load balance decisions. This also allows us to make a small fixlet to the fastpath in pick_next_task() under group scheduling. Note: this issue also exists with the existing sched_rt throttling mechanism. This patch does not address that. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184756.878333391@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ++---- kernel/sched_fair.c | 6 ++++++ kernel/sched_rt.c | 5 ++++- kernel/sched_stoptask.c | 2 ++ 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 86a24edff7d..a012b640b58 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -313,7 +313,7 @@ struct task_group root_task_group; /* CFS-related fields in a runqueue */ struct cfs_rq { struct load_weight load; - unsigned long nr_running; + unsigned long nr_running, h_nr_running; u64 exec_clock; u64 min_vruntime; @@ -1855,7 +1855,6 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags) rq->nr_uninterruptible--; enqueue_task(rq, p, flags); - inc_nr_running(rq); } /* @@ -1867,7 +1866,6 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) rq->nr_uninterruptible++; dequeue_task(rq, p, flags); - dec_nr_running(rq); } #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -4503,7 +4501,7 @@ pick_next_task(struct rq *rq) * Optimization: we know that if all tasks are in * the fair class we can call that function directly: */ - if (likely(rq->nr_running == rq->cfs.nr_running)) { + if (likely(rq->nr_running == rq->cfs.h_nr_running)) { p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 82529361c59..aac5926bc3a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1313,16 +1313,19 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) break; cfs_rq = cfs_rq_of(se); enqueue_entity(cfs_rq, se, flags); + cfs_rq->h_nr_running++; flags = ENQUEUE_WAKEUP; } for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); + cfs_rq->h_nr_running++; update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } + inc_nr_running(rq); hrtick_update(rq); } @@ -1342,6 +1345,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); + cfs_rq->h_nr_running--; /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { @@ -1361,11 +1365,13 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); + cfs_rq->h_nr_running--; update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } + dec_nr_running(rq); hrtick_update(rq); } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 5bc10c1195f..e2671631b96 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -962,6 +962,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); + + inc_nr_running(rq); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -972,6 +974,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) dequeue_rt_entity(rt_se); dequeue_pushable_task(rq, p); + + dec_nr_running(rq); } /* @@ -1861,4 +1865,3 @@ static void print_rt_stats(struct seq_file *m, int cpu) rcu_read_unlock(); } #endif /* CONFIG_SCHED_DEBUG */ - diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 6f437632afa..8b44e7fa7fb 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c @@ -34,11 +34,13 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) static void enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) { + inc_nr_running(rq); } static void dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) { + dec_nr_running(rq); } static void yield_task_stop(struct rq *rq) From cc86b4c44bfa11fd8cecfc9053c7bd79efcb0ef8 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:28 -0700 Subject: [PATCH 372/678] sched: Introduce primitives to account for CFS bandwidth tracking In this patch we introduce the notion of CFS bandwidth, partitioned into globally unassigned bandwidth, and locally claimed bandwidth. - The global bandwidth is per task_group, it represents a pool of unclaimed bandwidth that cfs_rqs can allocate from. - The local bandwidth is tracked per-cfs_rq, this represents allotments from the global pool bandwidth assigned to a specific cpu. Bandwidth is managed via cgroupfs, adding two new interfaces to the cpu subsystem: - cpu.cfs_period_us : the bandwidth period in usecs - cpu.cfs_quota_us : the cpu bandwidth (in usecs) that this tg will be allowed to consume over period above. Signed-off-by: Paul Turner Signed-off-by: Nikhil Rao Signed-off-by: Bharata B Rao Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184756.972636699@google.com Signed-off-by: Ingo Molnar --- init/Kconfig | 12 +++ kernel/sched.c | 196 +++++++++++++++++++++++++++++++++++++++++++- kernel/sched_fair.c | 16 ++++ 3 files changed, 220 insertions(+), 4 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 6aad581f18e..97315e5f14f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -715,6 +715,18 @@ config FAIR_GROUP_SCHED depends on CGROUP_SCHED default CGROUP_SCHED +config CFS_BANDWIDTH + bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" + depends on EXPERIMENTAL + depends on FAIR_GROUP_SCHED + default n + help + This option allows users to define CPU bandwidth rates (limits) for + tasks running within the fair group scheduler. Groups with no limit + set are considered to be unconstrained and will run with no + restriction. + See tip/Documentation/scheduler/sched-bwc.txt for more information. + config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on EXPERIMENTAL diff --git a/kernel/sched.c b/kernel/sched.c index a012b640b58..7a9fe756e9c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -249,6 +249,14 @@ struct cfs_rq; static LIST_HEAD(task_groups); +struct cfs_bandwidth { +#ifdef CONFIG_CFS_BANDWIDTH + raw_spinlock_t lock; + ktime_t period; + u64 quota; +#endif +}; + /* task group related information */ struct task_group { struct cgroup_subsys_state css; @@ -280,6 +288,8 @@ struct task_group { #ifdef CONFIG_SCHED_AUTOGROUP struct autogroup *autogroup; #endif + + struct cfs_bandwidth cfs_bandwidth; }; /* task_group_lock serializes the addition/removal of task groups */ @@ -379,9 +389,48 @@ struct cfs_rq { unsigned long load_contribution; #endif +#ifdef CONFIG_CFS_BANDWIDTH + int runtime_enabled; + s64 runtime_remaining; +#endif #endif }; +#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_CFS_BANDWIDTH +static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) +{ + return &tg->cfs_bandwidth; +} + +static inline u64 default_cfs_period(void); + +static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) +{ + raw_spin_lock_init(&cfs_b->lock); + cfs_b->quota = RUNTIME_INF; + cfs_b->period = ns_to_ktime(default_cfs_period()); +} + +static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + cfs_rq->runtime_enabled = 0; +} + +static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) +{} +#else +static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} +static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} + +static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) +{ + return NULL; +} +#endif /* CONFIG_CFS_BANDWIDTH */ +#endif /* CONFIG_FAIR_GROUP_SCHED */ + /* Real-Time classes' related field in a runqueue: */ struct rt_rq { struct rt_prio_array active; @@ -8291,6 +8340,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, /* allow initial update_cfs_load() to truncate */ cfs_rq->load_stamp = 1; #endif + init_cfs_rq_runtime(cfs_rq); tg->cfs_rq[cpu] = cfs_rq; tg->se[cpu] = se; @@ -8430,6 +8480,7 @@ void __init sched_init(void) * We achieve this by letting root_task_group's tasks sit * directly in rq->cfs (i.e root_task_group->se[] = NULL). */ + init_cfs_bandwidth(&root_task_group.cfs_bandwidth); init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -8681,6 +8732,8 @@ static void free_fair_sched_group(struct task_group *tg) { int i; + destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); + for_each_possible_cpu(i) { if (tg->cfs_rq) kfree(tg->cfs_rq[i]); @@ -8708,6 +8761,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) tg->shares = NICE_0_LOAD; + init_cfs_bandwidth(tg_cfs_bandwidth(tg)); + for_each_possible_cpu(i) { cfs_rq = kzalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cpu_to_node(i)); @@ -9090,7 +9145,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) return walk_tg_tree(tg_schedulable, tg_nop, &data); } -static int tg_set_bandwidth(struct task_group *tg, +static int tg_set_rt_bandwidth(struct task_group *tg, u64 rt_period, u64 rt_runtime) { int i, err = 0; @@ -9129,7 +9184,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) if (rt_runtime_us < 0) rt_runtime = RUNTIME_INF; - return tg_set_bandwidth(tg, rt_period, rt_runtime); + return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } long sched_group_rt_runtime(struct task_group *tg) @@ -9154,7 +9209,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) if (rt_period == 0) return -EINVAL; - return tg_set_bandwidth(tg, rt_period, rt_runtime); + return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } long sched_group_rt_period(struct task_group *tg) @@ -9358,6 +9413,128 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) return (u64) scale_load_down(tg->shares); } + +#ifdef CONFIG_CFS_BANDWIDTH +const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ +const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ + +static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) +{ + int i; + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + static DEFINE_MUTEX(mutex); + + if (tg == &root_task_group) + return -EINVAL; + + /* + * Ensure we have at some amount of bandwidth every period. This is + * to prevent reaching a state of large arrears when throttled via + * entity_tick() resulting in prolonged exit starvation. + */ + if (quota < min_cfs_quota_period || period < min_cfs_quota_period) + return -EINVAL; + + /* + * Likewise, bound things on the otherside by preventing insane quota + * periods. This also allows us to normalize in computing quota + * feasibility. + */ + if (period > max_cfs_quota_period) + return -EINVAL; + + mutex_lock(&mutex); + raw_spin_lock_irq(&cfs_b->lock); + cfs_b->period = ns_to_ktime(period); + cfs_b->quota = quota; + raw_spin_unlock_irq(&cfs_b->lock); + + for_each_possible_cpu(i) { + struct cfs_rq *cfs_rq = tg->cfs_rq[i]; + struct rq *rq = rq_of(cfs_rq); + + raw_spin_lock_irq(&rq->lock); + cfs_rq->runtime_enabled = quota != RUNTIME_INF; + cfs_rq->runtime_remaining = 0; + raw_spin_unlock_irq(&rq->lock); + } + mutex_unlock(&mutex); + + return 0; +} + +int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) +{ + u64 quota, period; + + period = ktime_to_ns(tg_cfs_bandwidth(tg)->period); + if (cfs_quota_us < 0) + quota = RUNTIME_INF; + else + quota = (u64)cfs_quota_us * NSEC_PER_USEC; + + return tg_set_cfs_bandwidth(tg, period, quota); +} + +long tg_get_cfs_quota(struct task_group *tg) +{ + u64 quota_us; + + if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF) + return -1; + + quota_us = tg_cfs_bandwidth(tg)->quota; + do_div(quota_us, NSEC_PER_USEC); + + return quota_us; +} + +int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) +{ + u64 quota, period; + + period = (u64)cfs_period_us * NSEC_PER_USEC; + quota = tg_cfs_bandwidth(tg)->quota; + + if (period <= 0) + return -EINVAL; + + return tg_set_cfs_bandwidth(tg, period, quota); +} + +long tg_get_cfs_period(struct task_group *tg) +{ + u64 cfs_period_us; + + cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period); + do_div(cfs_period_us, NSEC_PER_USEC); + + return cfs_period_us; +} + +static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft) +{ + return tg_get_cfs_quota(cgroup_tg(cgrp)); +} + +static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype, + s64 cfs_quota_us) +{ + return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us); +} + +static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft) +{ + return tg_get_cfs_period(cgroup_tg(cgrp)); +} + +static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, + u64 cfs_period_us) +{ + return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); +} + +#endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED @@ -9392,6 +9569,18 @@ static struct cftype cpu_files[] = { .write_u64 = cpu_shares_write_u64, }, #endif +#ifdef CONFIG_CFS_BANDWIDTH + { + .name = "cfs_quota_us", + .read_s64 = cpu_cfs_quota_read_s64, + .write_s64 = cpu_cfs_quota_write_s64, + }, + { + .name = "cfs_period_us", + .read_u64 = cpu_cfs_period_read_u64, + .write_u64 = cpu_cfs_period_write_u64, + }, +#endif #ifdef CONFIG_RT_GROUP_SCHED { .name = "rt_runtime_us", @@ -9770,4 +9959,3 @@ struct cgroup_subsys cpuacct_subsys = { .subsys_id = cpuacct_subsys_id, }; #endif /* CONFIG_CGROUP_CPUACCT */ - diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index aac5926bc3a..b380392f14b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1237,6 +1237,22 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) check_preempt_tick(cfs_rq, curr); } + +/************************************************** + * CFS bandwidth control machinery + */ + +#ifdef CONFIG_CFS_BANDWIDTH +/* + * default period for cfs group bandwidth. + * default: 0.1s, units: nanoseconds + */ +static inline u64 default_cfs_period(void) +{ + return 100000000ULL; +} +#endif + /************************************************** * CFS operations on tasks: */ From 29aa27add21f292c81f49cdb1b1113530309c0d7 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:29 -0700 Subject: [PATCH 373/678] sched: Validate CFS quota hierarchies Add constraints validation for CFS bandwidth hierarchies. Validate that: max(child bandwidth) <= parent_bandwidth In a quota limited hierarchy, an unconstrained entity (e.g. bandwidth==RUNTIME_INF) inherits the bandwidth of its parent. This constraint is chosen over sum(child_bandwidth) as notion of over-commit is valuable within SCHED_OTHER. Some basic code from the RT case is re-factored for reuse. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.083774572@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 112 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 98 insertions(+), 14 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 7a9fe756e9c..2467f90484d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -254,6 +254,7 @@ struct cfs_bandwidth { raw_spinlock_t lock; ktime_t period; u64 quota; + s64 hierarchal_quota; #endif }; @@ -1538,7 +1539,8 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) update_load_sub(&rq->load, load); } -#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) +#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ + (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) typedef int (*tg_visitor)(struct task_group *, void *); /* @@ -9045,12 +9047,7 @@ unsigned long sched_group_shares(struct task_group *tg) } #endif -#ifdef CONFIG_RT_GROUP_SCHED -/* - * Ensure that the real time constraints are schedulable. - */ -static DEFINE_MUTEX(rt_constraints_mutex); - +#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) static unsigned long to_ratio(u64 period, u64 runtime) { if (runtime == RUNTIME_INF) @@ -9058,6 +9055,13 @@ static unsigned long to_ratio(u64 period, u64 runtime) return div64_u64(runtime << 20, period); } +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +/* + * Ensure that the real time constraints are schedulable. + */ +static DEFINE_MUTEX(rt_constraints_mutex); /* Must be called with tasklist_lock held */ static inline int tg_has_rt_tasks(struct task_group *tg) @@ -9078,7 +9082,7 @@ struct rt_schedulable_data { u64 rt_runtime; }; -static int tg_schedulable(struct task_group *tg, void *data) +static int tg_rt_schedulable(struct task_group *tg, void *data) { struct rt_schedulable_data *d = data; struct task_group *child; @@ -9142,7 +9146,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) .rt_runtime = runtime, }; - return walk_tg_tree(tg_schedulable, tg_nop, &data); + return walk_tg_tree(tg_rt_schedulable, tg_nop, &data); } static int tg_set_rt_bandwidth(struct task_group *tg, @@ -9415,14 +9419,17 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) } #ifdef CONFIG_CFS_BANDWIDTH +static DEFINE_MUTEX(cfs_constraints_mutex); + const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ +static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); + static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) { - int i; + int i, ret = 0; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); - static DEFINE_MUTEX(mutex); if (tg == &root_task_group) return -EINVAL; @@ -9443,7 +9450,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) if (period > max_cfs_quota_period) return -EINVAL; - mutex_lock(&mutex); + mutex_lock(&cfs_constraints_mutex); + ret = __cfs_schedulable(tg, period, quota); + if (ret) + goto out_unlock; + raw_spin_lock_irq(&cfs_b->lock); cfs_b->period = ns_to_ktime(period); cfs_b->quota = quota; @@ -9458,9 +9469,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) cfs_rq->runtime_remaining = 0; raw_spin_unlock_irq(&rq->lock); } - mutex_unlock(&mutex); +out_unlock: + mutex_unlock(&cfs_constraints_mutex); - return 0; + return ret; } int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) @@ -9534,6 +9546,78 @@ static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); } +struct cfs_schedulable_data { + struct task_group *tg; + u64 period, quota; +}; + +/* + * normalize group quota/period to be quota/max_period + * note: units are usecs + */ +static u64 normalize_cfs_quota(struct task_group *tg, + struct cfs_schedulable_data *d) +{ + u64 quota, period; + + if (tg == d->tg) { + period = d->period; + quota = d->quota; + } else { + period = tg_get_cfs_period(tg); + quota = tg_get_cfs_quota(tg); + } + + /* note: these should typically be equivalent */ + if (quota == RUNTIME_INF || quota == -1) + return RUNTIME_INF; + + return to_ratio(period, quota); +} + +static int tg_cfs_schedulable_down(struct task_group *tg, void *data) +{ + struct cfs_schedulable_data *d = data; + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + s64 quota = 0, parent_quota = -1; + + if (!tg->parent) { + quota = RUNTIME_INF; + } else { + struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent); + + quota = normalize_cfs_quota(tg, d); + parent_quota = parent_b->hierarchal_quota; + + /* + * ensure max(child_quota) <= parent_quota, inherit when no + * limit is set + */ + if (quota == RUNTIME_INF) + quota = parent_quota; + else if (parent_quota != RUNTIME_INF && quota > parent_quota) + return -EINVAL; + } + cfs_b->hierarchal_quota = quota; + + return 0; +} + +static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) +{ + struct cfs_schedulable_data data = { + .tg = tg, + .period = period, + .quota = quota, + }; + + if (quota != RUNTIME_INF) { + do_div(data.period, NSEC_PER_USEC); + do_div(data.quota, NSEC_PER_USEC); + } + + return walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); +} #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ From cc26500ac8b058c77185e6bbd0abacbb7852a6f9 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:30 -0700 Subject: [PATCH 374/678] sched: Accumulate per-cfs_rq cpu usage and charge against bandwidth Account bandwidth usage on the cfs_rq level versus the task_groups to which they belong. Whether we are tracking bandwidth on a given cfs_rq is maintained under cfs_rq->runtime_enabled. cfs_rq's which belong to a bandwidth constrained task_group have their runtime accounted via the update_curr() path, which withdraws bandwidth from the global pool as desired. Updates involving the global pool are currently protected under cfs_bandwidth->lock, local runtime is protected by rq->lock. This patch only assigns and tracks quota, no action is taken in the case that cfs_rq->runtime_used exceeds cfs_rq->runtime_assigned. Signed-off-by: Paul Turner Signed-off-by: Nikhil Rao Signed-off-by: Bharata B Rao Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.179386821@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 +++ kernel/sched.c | 4 ++- kernel/sched_fair.c | 79 +++++++++++++++++++++++++++++++++++++++++-- kernel/sysctl.c | 10 ++++++ 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 306ec7b4305..3c5e6c5b036 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2056,6 +2056,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } #endif +#ifdef CONFIG_CFS_BANDWIDTH +extern unsigned int sysctl_sched_cfs_bandwidth_slice; +#endif + #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); diff --git a/kernel/sched.c b/kernel/sched.c index 2467f90484d..c9c12070246 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -253,7 +253,7 @@ struct cfs_bandwidth { #ifdef CONFIG_CFS_BANDWIDTH raw_spinlock_t lock; ktime_t period; - u64 quota; + u64 quota, runtime; s64 hierarchal_quota; #endif }; @@ -409,6 +409,7 @@ static inline u64 default_cfs_period(void); static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { raw_spin_lock_init(&cfs_b->lock); + cfs_b->runtime = 0; cfs_b->quota = RUNTIME_INF; cfs_b->period = ns_to_ktime(default_cfs_period()); } @@ -9458,6 +9459,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) raw_spin_lock_irq(&cfs_b->lock); cfs_b->period = ns_to_ktime(period); cfs_b->quota = quota; + cfs_b->runtime = quota; raw_spin_unlock_irq(&cfs_b->lock); for_each_possible_cpu(i) { diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b380392f14b..a091014a09d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -89,6 +89,20 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL; */ unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; +#ifdef CONFIG_CFS_BANDWIDTH +/* + * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool + * each time a cfs_rq requests quota. + * + * Note: in the case that the slice exceeds the runtime remaining (either due + * to consumption or the quota being specified to be smaller than the slice) + * we will always only issue the remaining available time. + * + * default: 5 msec, units: microseconds + */ +unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; +#endif + static const struct sched_class fair_sched_class; /************************************************************** @@ -292,6 +306,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) #endif /* CONFIG_FAIR_GROUP_SCHED */ +static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec); /************************************************************** * Scheduling class tree data structure manipulation methods: @@ -583,6 +599,8 @@ static void update_curr(struct cfs_rq *cfs_rq) cpuacct_charge(curtask, delta_exec); account_group_exec_runtime(curtask, delta_exec); } + + account_cfs_rq_runtime(cfs_rq, delta_exec); } static inline void @@ -1251,6 +1269,58 @@ static inline u64 default_cfs_period(void) { return 100000000ULL; } + +static inline u64 sched_cfs_bandwidth_slice(void) +{ + return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC; +} + +static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + struct task_group *tg = cfs_rq->tg; + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + u64 amount = 0, min_amount; + + /* note: this is a positive sum as runtime_remaining <= 0 */ + min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; + + raw_spin_lock(&cfs_b->lock); + if (cfs_b->quota == RUNTIME_INF) + amount = min_amount; + else if (cfs_b->runtime > 0) { + amount = min(cfs_b->runtime, min_amount); + cfs_b->runtime -= amount; + } + raw_spin_unlock(&cfs_b->lock); + + cfs_rq->runtime_remaining += amount; +} + +static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec) +{ + if (!cfs_rq->runtime_enabled) + return; + + cfs_rq->runtime_remaining -= delta_exec; + if (cfs_rq->runtime_remaining > 0) + return; + + assign_cfs_rq_runtime(cfs_rq); +} + +static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec) +{ + if (!cfs_rq->runtime_enabled) + return; + + __account_cfs_rq_runtime(cfs_rq, delta_exec); +} + +#else +static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec) {} #endif /************************************************** @@ -4272,8 +4342,13 @@ static void set_curr_task_fair(struct rq *rq) { struct sched_entity *se = &rq->curr->se; - for_each_sched_entity(se) - set_next_entity(cfs_rq_of(se), se); + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + set_next_entity(cfs_rq, se); + /* ensure bandwidth has been allocated on our new cfs_rq */ + account_cfs_rq_runtime(cfs_rq, 0); + } } #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b33ea9870c4..094db256922 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -380,6 +380,16 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, #endif +#ifdef CONFIG_CFS_BANDWIDTH + { + .procname = "sched_cfs_bandwidth_slice_us", + .data = &sysctl_sched_cfs_bandwidth_slice, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", From c70398e29c318b86b9ccc82c6365a35a5896a860 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:31 -0700 Subject: [PATCH 375/678] sched: Add a timer to handle CFS bandwidth refresh This patch adds a per-task_group timer which handles the refresh of the global CFS bandwidth pool. Since the RT pool is using a similar timer there's some small refactoring to share this support. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.277271273@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 107 +++++++++++++++++++++++++++++++++++--------- kernel/sched_fair.c | 40 +++++++++++++++-- 2 files changed, 123 insertions(+), 24 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index c9c12070246..51428709040 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -198,10 +198,28 @@ static inline int rt_bandwidth_enabled(void) return sysctl_sched_rt_runtime >= 0; } -static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) { - ktime_t now; + unsigned long delta; + ktime_t soft, hard, now; + for (;;) { + if (hrtimer_active(period_timer)) + break; + + now = hrtimer_cb_get_time(period_timer); + hrtimer_forward(period_timer, now, period); + + soft = hrtimer_get_softexpires(period_timer); + hard = hrtimer_get_expires(period_timer); + delta = ktime_to_ns(ktime_sub(hard, soft)); + __hrtimer_start_range_ns(period_timer, soft, delta, + HRTIMER_MODE_ABS_PINNED, 0); + } +} + +static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +{ if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return; @@ -209,22 +227,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) return; raw_spin_lock(&rt_b->rt_runtime_lock); - for (;;) { - unsigned long delta; - ktime_t soft, hard; - - if (hrtimer_active(&rt_b->rt_period_timer)) - break; - - now = hrtimer_cb_get_time(&rt_b->rt_period_timer); - hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); - - soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); - hard = hrtimer_get_expires(&rt_b->rt_period_timer); - delta = ktime_to_ns(ktime_sub(hard, soft)); - __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, - HRTIMER_MODE_ABS_PINNED, 0); - } + start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); raw_spin_unlock(&rt_b->rt_runtime_lock); } @@ -255,6 +258,9 @@ struct cfs_bandwidth { ktime_t period; u64 quota, runtime; s64 hierarchal_quota; + + int idle, timer_active; + struct hrtimer period_timer; #endif }; @@ -405,6 +411,28 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) } static inline u64 default_cfs_period(void); +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); + +static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) +{ + struct cfs_bandwidth *cfs_b = + container_of(timer, struct cfs_bandwidth, period_timer); + ktime_t now; + int overrun; + int idle = 0; + + for (;;) { + now = hrtimer_cb_get_time(timer); + overrun = hrtimer_forward(timer, now, cfs_b->period); + + if (!overrun) + break; + + idle = do_sched_cfs_period_timer(cfs_b, overrun); + } + + return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; +} static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { @@ -412,6 +440,9 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->runtime = 0; cfs_b->quota = RUNTIME_INF; cfs_b->period = ns_to_ktime(default_cfs_period()); + + hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cfs_b->period_timer.function = sched_cfs_period_timer; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) @@ -419,8 +450,34 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_rq->runtime_enabled = 0; } +/* requires cfs_b->lock, may release to reprogram timer */ +static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) +{ + /* + * The timer may be active because we're trying to set a new bandwidth + * period or because we're racing with the tear-down path + * (timer_active==0 becomes visible before the hrtimer call-back + * terminates). In either case we ensure that it's re-programmed + */ + while (unlikely(hrtimer_active(&cfs_b->period_timer))) { + raw_spin_unlock(&cfs_b->lock); + /* ensure cfs_b->lock is available while we wait */ + hrtimer_cancel(&cfs_b->period_timer); + + raw_spin_lock(&cfs_b->lock); + /* if someone else restarted the timer then we're done */ + if (cfs_b->timer_active) + return; + } + + cfs_b->timer_active = 1; + start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); +} + static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) -{} +{ + hrtimer_cancel(&cfs_b->period_timer); +} #else static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} @@ -9429,7 +9486,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) { - int i, ret = 0; + int i, ret = 0, runtime_enabled; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); if (tg == &root_task_group) @@ -9456,10 +9513,18 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) if (ret) goto out_unlock; + runtime_enabled = quota != RUNTIME_INF; raw_spin_lock_irq(&cfs_b->lock); cfs_b->period = ns_to_ktime(period); cfs_b->quota = quota; cfs_b->runtime = quota; + + /* restart the period timer (if active) to handle new period expiry */ + if (runtime_enabled && cfs_b->timer_active) { + /* force a reprogram */ + cfs_b->timer_active = 0; + __start_cfs_bandwidth(cfs_b); + } raw_spin_unlock_irq(&cfs_b->lock); for_each_possible_cpu(i) { @@ -9467,7 +9532,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) struct rq *rq = rq_of(cfs_rq); raw_spin_lock_irq(&rq->lock); - cfs_rq->runtime_enabled = quota != RUNTIME_INF; + cfs_rq->runtime_enabled = runtime_enabled; cfs_rq->runtime_remaining = 0; raw_spin_unlock_irq(&rq->lock); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a091014a09d..2215221771d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1287,9 +1287,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) raw_spin_lock(&cfs_b->lock); if (cfs_b->quota == RUNTIME_INF) amount = min_amount; - else if (cfs_b->runtime > 0) { - amount = min(cfs_b->runtime, min_amount); - cfs_b->runtime -= amount; + else { + /* ensure bandwidth timer remains active under consumption */ + if (!cfs_b->timer_active) + __start_cfs_bandwidth(cfs_b); + + if (cfs_b->runtime > 0) { + amount = min(cfs_b->runtime, min_amount); + cfs_b->runtime -= amount; + cfs_b->idle = 0; + } } raw_spin_unlock(&cfs_b->lock); @@ -1318,6 +1325,33 @@ static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, __account_cfs_rq_runtime(cfs_rq, delta_exec); } +/* + * Responsible for refilling a task_group's bandwidth and unthrottling its + * cfs_rqs as appropriate. If there has been no activity within the last + * period the timer is deactivated until scheduling resumes; cfs_b->idle is + * used to track this state. + */ +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) +{ + int idle = 1; + + raw_spin_lock(&cfs_b->lock); + /* no need to continue the timer with no bandwidth constraint */ + if (cfs_b->quota == RUNTIME_INF) + goto out_unlock; + + idle = cfs_b->idle; + cfs_b->runtime = cfs_b->quota; + + /* mark as potentially idle for the upcoming period */ + cfs_b->idle = 1; +out_unlock: + if (idle) + cfs_b->timer_active = 0; + raw_spin_unlock(&cfs_b->lock); + + return idle; +} #else static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} From 1f7409ff9e2d0aab8ffa96bbca817039e3020b8f Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:32 -0700 Subject: [PATCH 376/678] sched: Expire invalid runtime Since quota is managed using a global state but consumed on a per-cpu basis we need to ensure that our per-cpu state is appropriately synchronized. Most importantly, runtime that is state (from a previous period) should not be locally consumable. We take advantage of existing sched_clock synchronization about the jiffy to efficiently detect whether we have (globally) crossed a quota boundary above. One catch is that the direction of spread on sched_clock is undefined, specifically, we don't know whether our local clock is behind or ahead of the one responsible for the current expiration time. Fortunately we can differentiate these by considering whether the global deadline has advanced. If it has not, then we assume our clock to be "fast" and advance our local expiration; otherwise, we know the deadline has truly passed and we expire our local runtime. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.379275352@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 +- kernel/sched_fair.c | 90 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 84 insertions(+), 10 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 51428709040..95c2c411cc6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -258,6 +258,7 @@ struct cfs_bandwidth { ktime_t period; u64 quota, runtime; s64 hierarchal_quota; + u64 runtime_expires; int idle, timer_active; struct hrtimer period_timer; @@ -398,6 +399,7 @@ struct cfs_rq { #endif #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; + u64 runtime_expires; s64 runtime_remaining; #endif #endif @@ -9517,8 +9519,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) raw_spin_lock_irq(&cfs_b->lock); cfs_b->period = ns_to_ktime(period); cfs_b->quota = quota; - cfs_b->runtime = quota; + __refill_cfs_bandwidth_runtime(cfs_b); /* restart the period timer (if active) to handle new period expiry */ if (runtime_enabled && cfs_b->timer_active) { /* force a reprogram */ diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 2215221771d..3767880167a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1275,11 +1275,30 @@ static inline u64 sched_cfs_bandwidth_slice(void) return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC; } +/* + * Replenish runtime according to assigned quota and update expiration time. + * We use sched_clock_cpu directly instead of rq->clock to avoid adding + * additional synchronization around rq->lock. + * + * requires cfs_b->lock + */ +static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) +{ + u64 now; + + if (cfs_b->quota == RUNTIME_INF) + return; + + now = sched_clock_cpu(smp_processor_id()); + cfs_b->runtime = cfs_b->quota; + cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); +} + static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) { struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); - u64 amount = 0, min_amount; + u64 amount = 0, min_amount, expires; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -1288,9 +1307,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) if (cfs_b->quota == RUNTIME_INF) amount = min_amount; else { - /* ensure bandwidth timer remains active under consumption */ - if (!cfs_b->timer_active) + /* + * If the bandwidth pool has become inactive, then at least one + * period must have elapsed since the last consumption. + * Refresh the global state and ensure bandwidth timer becomes + * active. + */ + if (!cfs_b->timer_active) { + __refill_cfs_bandwidth_runtime(cfs_b); __start_cfs_bandwidth(cfs_b); + } if (cfs_b->runtime > 0) { amount = min(cfs_b->runtime, min_amount); @@ -1298,19 +1324,61 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } + expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); cfs_rq->runtime_remaining += amount; + /* + * we may have advanced our local expiration to account for allowed + * spread between our sched_clock and the one on which runtime was + * issued. + */ + if ((s64)(expires - cfs_rq->runtime_expires) > 0) + cfs_rq->runtime_expires = expires; } -static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, - unsigned long delta_exec) +/* + * Note: This depends on the synchronization provided by sched_clock and the + * fact that rq->clock snapshots this value. + */ +static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) { - if (!cfs_rq->runtime_enabled) + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct rq *rq = rq_of(cfs_rq); + + /* if the deadline is ahead of our clock, nothing to do */ + if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0)) + return; + + if (cfs_rq->runtime_remaining < 0) return; + /* + * If the local deadline has passed we have to consider the + * possibility that our sched_clock is 'fast' and the global deadline + * has not truly expired. + * + * Fortunately we can check determine whether this the case by checking + * whether the global deadline has advanced. + */ + + if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) { + /* extend local deadline, drift is bounded above by 2 ticks */ + cfs_rq->runtime_expires += TICK_NSEC; + } else { + /* global deadline is ahead, expiration has passed */ + cfs_rq->runtime_remaining = 0; + } +} + +static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, + unsigned long delta_exec) +{ + /* dock delta_exec before expiring quota (as it could span periods) */ cfs_rq->runtime_remaining -= delta_exec; - if (cfs_rq->runtime_remaining > 0) + expire_cfs_rq_runtime(cfs_rq); + + if (likely(cfs_rq->runtime_remaining > 0)) return; assign_cfs_rq_runtime(cfs_rq); @@ -1341,7 +1409,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) goto out_unlock; idle = cfs_b->idle; - cfs_b->runtime = cfs_b->quota; + /* if we're going inactive then everything else can be deferred */ + if (idle) + goto out_unlock; + + __refill_cfs_bandwidth_runtime(cfs_b); + /* mark as potentially idle for the upcoming period */ cfs_b->idle = 1; @@ -1560,7 +1633,6 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) return wl; } - #else static inline unsigned long effective_load(struct task_group *tg, int cpu, From dd7a634bf8b0aa452b9390e2dbda5e7ee0b26876 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:33 -0700 Subject: [PATCH 377/678] sched: Add support for throttling group entities Now that consumption is tracked (via update_curr()) we add support to throttle group entities (and their corresponding cfs_rqs) in the case where this is no run-time remaining. Throttled entities are dequeued to prevent scheduling, additionally we mark them as throttled (using cfs_rq->throttled) to prevent them from becoming re-enqueued until they are unthrottled. A list of a task_group's throttled entities are maintained on the cfs_bandwidth structure. Note: While the machinery for throttling is added in this patch the act of throttling an entity exceeding its bandwidth is deferred until later within the series. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.480608533@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 ++++ kernel/sched_fair.c | 89 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 95c2c411cc6..1ad76a53be2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -262,6 +262,8 @@ struct cfs_bandwidth { int idle, timer_active; struct hrtimer period_timer; + struct list_head throttled_cfs_rq; + #endif }; @@ -401,6 +403,9 @@ struct cfs_rq { int runtime_enabled; u64 runtime_expires; s64 runtime_remaining; + + int throttled; + struct list_head throttled_list; #endif #endif }; @@ -443,6 +448,7 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->quota = RUNTIME_INF; cfs_b->period = ns_to_ktime(default_cfs_period()); + INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->period_timer.function = sched_cfs_period_timer; } @@ -450,6 +456,7 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) { cfs_rq->runtime_enabled = 0; + INIT_LIST_HEAD(&cfs_rq->throttled_list); } /* requires cfs_b->lock, may release to reprogram timer */ diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3767880167a..d4f93defdd2 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1294,7 +1294,8 @@ static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); } -static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) +/* returns 0 on failure to allocate runtime */ +static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) { struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); @@ -1335,6 +1336,8 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) */ if ((s64)(expires - cfs_rq->runtime_expires) > 0) cfs_rq->runtime_expires = expires; + + return cfs_rq->runtime_remaining > 0; } /* @@ -1381,7 +1384,12 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, if (likely(cfs_rq->runtime_remaining > 0)) return; - assign_cfs_rq_runtime(cfs_rq); + /* + * if we're unable to extend our runtime we resched so that the active + * hierarchy can be throttled + */ + if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) + resched_task(rq_of(cfs_rq)->curr); } static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, @@ -1393,6 +1401,47 @@ static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, __account_cfs_rq_runtime(cfs_rq, delta_exec); } +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) +{ + return cfs_rq->throttled; +} + +static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + long task_delta, dequeue = 1; + + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; + + /* account load preceding throttle */ + update_cfs_load(cfs_rq, 0); + + task_delta = cfs_rq->h_nr_running; + for_each_sched_entity(se) { + struct cfs_rq *qcfs_rq = cfs_rq_of(se); + /* throttled entity or throttle-on-deactivate */ + if (!se->on_rq) + break; + + if (dequeue) + dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); + qcfs_rq->h_nr_running -= task_delta; + + if (qcfs_rq->load.weight) + dequeue = 0; + } + + if (!se) + rq->nr_running -= task_delta; + + cfs_rq->throttled = 1; + raw_spin_lock(&cfs_b->lock); + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + raw_spin_unlock(&cfs_b->lock); +} + /* * Responsible for refilling a task_group's bandwidth and unthrottling its * cfs_rqs as appropriate. If there has been no activity within the last @@ -1428,6 +1477,11 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) #else static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} + +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) +{ + return 0; +} #endif /************************************************** @@ -1506,7 +1560,17 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) break; cfs_rq = cfs_rq_of(se); enqueue_entity(cfs_rq, se, flags); + + /* + * end evaluation on encountering a throttled cfs_rq + * + * note: in the case of encountering a throttled cfs_rq we will + * post the final h_nr_running increment below. + */ + if (cfs_rq_throttled(cfs_rq)) + break; cfs_rq->h_nr_running++; + flags = ENQUEUE_WAKEUP; } @@ -1514,11 +1578,15 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running++; + if (cfs_rq_throttled(cfs_rq)) + break; + update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } - inc_nr_running(rq); + if (!se) + inc_nr_running(rq); hrtick_update(rq); } @@ -1538,6 +1606,15 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); + + /* + * end evaluation on encountering a throttled cfs_rq + * + * note: in the case of encountering a throttled cfs_rq we will + * post the final h_nr_running decrement below. + */ + if (cfs_rq_throttled(cfs_rq)) + break; cfs_rq->h_nr_running--; /* Don't dequeue parent if it has other entities besides us */ @@ -1560,11 +1637,15 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running--; + if (cfs_rq_throttled(cfs_rq)) + break; + update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } - dec_nr_running(rq); + if (!se) + dec_nr_running(rq); hrtick_update(rq); } From 29fc7bd9b4714012eff2c0121d8d7b221b6140c4 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:34 -0700 Subject: [PATCH 378/678] sched: Add support for unthrottling group entities At the start of each period we refresh the global bandwidth pool. At this time we must also unthrottle any cfs_rq entities who are now within bandwidth once more (as quota permits). Unthrottled entities have their corresponding cfs_rq->throttled flag cleared and their entities re-enqueued. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.574628950@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 3 ++ kernel/sched_fair.c | 127 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 1ad76a53be2..32913ba8cab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9543,6 +9543,9 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) raw_spin_lock_irq(&rq->lock); cfs_rq->runtime_enabled = runtime_enabled; cfs_rq->runtime_remaining = 0; + + if (cfs_rq_throttled(cfs_rq)) + unthrottle_cfs_rq(cfs_rq); raw_spin_unlock_irq(&rq->lock); } out_unlock: diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index d4f93defdd2..01f3fca8560 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1442,6 +1442,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) raw_spin_unlock(&cfs_b->lock); } +static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + int enqueue = 1; + long task_delta; + + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; + + cfs_rq->throttled = 0; + raw_spin_lock(&cfs_b->lock); + list_del_rcu(&cfs_rq->throttled_list); + raw_spin_unlock(&cfs_b->lock); + + if (!cfs_rq->load.weight) + return; + + task_delta = cfs_rq->h_nr_running; + for_each_sched_entity(se) { + if (se->on_rq) + enqueue = 0; + + cfs_rq = cfs_rq_of(se); + if (enqueue) + enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + cfs_rq->h_nr_running += task_delta; + + if (cfs_rq_throttled(cfs_rq)) + break; + } + + if (!se) + rq->nr_running += task_delta; + + /* determine whether we need to wake up potentially idle cpu */ + if (rq->curr == rq->idle && rq->cfs.nr_running) + resched_task(rq->curr); +} + +static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, + u64 remaining, u64 expires) +{ + struct cfs_rq *cfs_rq; + u64 runtime = remaining; + + rcu_read_lock(); + list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, + throttled_list) { + struct rq *rq = rq_of(cfs_rq); + + raw_spin_lock(&rq->lock); + if (!cfs_rq_throttled(cfs_rq)) + goto next; + + runtime = -cfs_rq->runtime_remaining + 1; + if (runtime > remaining) + runtime = remaining; + remaining -= runtime; + + cfs_rq->runtime_remaining += runtime; + cfs_rq->runtime_expires = expires; + + /* we check whether we're throttled above */ + if (cfs_rq->runtime_remaining > 0) + unthrottle_cfs_rq(cfs_rq); + +next: + raw_spin_unlock(&rq->lock); + + if (!remaining) + break; + } + rcu_read_unlock(); + + return remaining; +} + /* * Responsible for refilling a task_group's bandwidth and unthrottling its * cfs_rqs as appropriate. If there has been no activity within the last @@ -1450,23 +1528,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) */ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) { - int idle = 1; + u64 runtime, runtime_expires; + int idle = 1, throttled; raw_spin_lock(&cfs_b->lock); /* no need to continue the timer with no bandwidth constraint */ if (cfs_b->quota == RUNTIME_INF) goto out_unlock; - idle = cfs_b->idle; + throttled = !list_empty(&cfs_b->throttled_cfs_rq); + /* idle depends on !throttled (for the case of a large deficit) */ + idle = cfs_b->idle && !throttled; + /* if we're going inactive then everything else can be deferred */ if (idle) goto out_unlock; __refill_cfs_bandwidth_runtime(cfs_b); + if (!throttled) { + /* mark as potentially idle for the upcoming period */ + cfs_b->idle = 1; + goto out_unlock; + } + + /* + * There are throttled entities so we must first use the new bandwidth + * to unthrottle them before making it generally available. This + * ensures that all existing debts will be paid before a new cfs_rq is + * allowed to run. + */ + runtime = cfs_b->runtime; + runtime_expires = cfs_b->runtime_expires; + cfs_b->runtime = 0; + + /* + * This check is repeated as we are holding onto the new bandwidth + * while we unthrottle. This can potentially race with an unthrottled + * group trying to acquire new bandwidth from the global pool. + */ + while (throttled && runtime > 0) { + raw_spin_unlock(&cfs_b->lock); + /* we can't nest cfs_b->lock while distributing bandwidth */ + runtime = distribute_cfs_runtime(cfs_b, runtime, + runtime_expires); + raw_spin_lock(&cfs_b->lock); + + throttled = !list_empty(&cfs_b->throttled_cfs_rq); + } - /* mark as potentially idle for the upcoming period */ - cfs_b->idle = 1; + /* return (any) remaining runtime */ + cfs_b->runtime = runtime; + /* + * While we are ensured activity in the period following an + * unthrottle, this also covers the case in which the new bandwidth is + * insufficient to cover the existing bandwidth deficit. (Forcing the + * timer to remain active while there are any throttled entities.) + */ + cfs_b->idle = 0; out_unlock: if (idle) cfs_b->timer_active = 0; From 84113c87d117db9b009fd89aac48791f3533cdab Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:35 -0700 Subject: [PATCH 379/678] sched: Allow for positional tg_tree walks Extend walk_tg_tree to accept a positional argument static int walk_tg_tree_from(struct task_group *from, tg_visitor down, tg_visitor up, void *data) Existing semantics are preserved, caller must hold rcu_lock() or sufficient analogue. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.677889157@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 50 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 32913ba8cab..775e70f9e72 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1611,20 +1611,23 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) typedef int (*tg_visitor)(struct task_group *, void *); /* - * Iterate the full tree, calling @down when first entering a node and @up when - * leaving it for the final time. + * Iterate task_group tree rooted at *from, calling @down when first entering a + * node and @up when leaving it for the final time. + * + * Caller must hold rcu_lock or sufficient equivalent. */ -static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) +static int walk_tg_tree_from(struct task_group *from, + tg_visitor down, tg_visitor up, void *data) { struct task_group *parent, *child; int ret; - rcu_read_lock(); - parent = &root_task_group; + parent = from; + down: ret = (*down)(parent, data); if (ret) - goto out_unlock; + goto out; list_for_each_entry_rcu(child, &parent->children, siblings) { parent = child; goto down; @@ -1633,19 +1636,29 @@ static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) continue; } ret = (*up)(parent, data); - if (ret) - goto out_unlock; + if (ret || parent == from) + goto out; child = parent; parent = parent->parent; if (parent) goto up; -out_unlock: - rcu_read_unlock(); - +out: return ret; } +/* + * Iterate the full tree, calling @down when first entering a node and @up when + * leaving it for the final time. + * + * Caller must hold rcu_lock or sufficient equivalent. + */ + +static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) +{ + return walk_tg_tree_from(&root_task_group, down, up, data); +} + static int tg_nop(struct task_group *tg, void *data) { return 0; @@ -9207,13 +9220,19 @@ static int tg_rt_schedulable(struct task_group *tg, void *data) static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { + int ret; + struct rt_schedulable_data data = { .tg = tg, .rt_period = period, .rt_runtime = runtime, }; - return walk_tg_tree(tg_rt_schedulable, tg_nop, &data); + rcu_read_lock(); + ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data); + rcu_read_unlock(); + + return ret; } static int tg_set_rt_bandwidth(struct task_group *tg, @@ -9684,6 +9703,7 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data) static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) { + int ret; struct cfs_schedulable_data data = { .tg = tg, .period = period, @@ -9695,7 +9715,11 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) do_div(data.quota, NSEC_PER_USEC); } - return walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); + rcu_read_lock(); + ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); + rcu_read_unlock(); + + return ret; } #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ From 6e873375089dea6a209de0dfc00bc9d46fc58e7a Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:36 -0700 Subject: [PATCH 380/678] sched: Prevent interactions with throttled entities From the perspective of load-balance and shares distribution, throttled entities should be invisible. However, both of these operations work on 'active' lists and are not inherently aware of what group hierarchies may be present. In some cases this may be side-stepped (e.g. we could sideload via tg_load_down in load balance) while in others (e.g. update_shares()) it is more difficult to compute without incurring some O(n^2) costs. Instead, track hierarchicaal throttled state at time of transition. This allows us to easily identify whether an entity belongs to a throttled hierarchy and avoid incorrect interactions with it. Also, when an entity leaves a throttled hierarchy we need to advance its time averaging for shares averaging so that the elapsed throttled time is not considered as part of the cfs_rq's operation. We also use this information to prevent buddy interactions in the wakeup and yield_to() paths. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.777916795@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- kernel/sched_fair.c | 99 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 775e70f9e72..9d0f05aed3a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -404,7 +404,7 @@ struct cfs_rq { u64 runtime_expires; s64 runtime_remaining; - int throttled; + int throttled, throttle_count; struct list_head throttled_list; #endif #endif diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 01f3fca8560..ce9bcf1a436 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -706,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) } #ifdef CONFIG_FAIR_GROUP_SCHED +/* we need this in update_cfs_load and load-balance functions below */ +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); # ifdef CONFIG_SMP static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, int global_update) @@ -728,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) u64 now, delta; unsigned long load = cfs_rq->load.weight; - if (cfs_rq->tg == &root_task_group) + if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq)) return; now = rq_of(cfs_rq)->clock_task; @@ -837,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq) tg = cfs_rq->tg; se = tg->se[cpu_of(rq_of(cfs_rq))]; - if (!se) + if (!se || throttled_hierarchy(cfs_rq)) return; #ifndef CONFIG_SMP if (likely(se->load.weight == tg->shares)) @@ -1406,6 +1408,65 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) return cfs_rq->throttled; } +/* check whether cfs_rq, or any parent, is throttled */ +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) +{ + return cfs_rq->throttle_count; +} + +/* + * Ensure that neither of the group entities corresponding to src_cpu or + * dest_cpu are members of a throttled hierarchy when performing group + * load-balance operations. + */ +static inline int throttled_lb_pair(struct task_group *tg, + int src_cpu, int dest_cpu) +{ + struct cfs_rq *src_cfs_rq, *dest_cfs_rq; + + src_cfs_rq = tg->cfs_rq[src_cpu]; + dest_cfs_rq = tg->cfs_rq[dest_cpu]; + + return throttled_hierarchy(src_cfs_rq) || + throttled_hierarchy(dest_cfs_rq); +} + +/* updated child weight may affect parent so we have to do this bottom up */ +static int tg_unthrottle_up(struct task_group *tg, void *data) +{ + struct rq *rq = data; + struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; + + cfs_rq->throttle_count--; +#ifdef CONFIG_SMP + if (!cfs_rq->throttle_count) { + u64 delta = rq->clock_task - cfs_rq->load_stamp; + + /* leaving throttled state, advance shares averaging windows */ + cfs_rq->load_stamp += delta; + cfs_rq->load_last += delta; + + /* update entity weight now that we are on_rq again */ + update_cfs_shares(cfs_rq); + } +#endif + + return 0; +} + +static int tg_throttle_down(struct task_group *tg, void *data) +{ + struct rq *rq = data; + struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; + + /* group is entering throttled state, record last load */ + if (!cfs_rq->throttle_count) + update_cfs_load(cfs_rq, 0); + cfs_rq->throttle_count++; + + return 0; +} + static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); @@ -1416,7 +1477,9 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; /* account load preceding throttle */ - update_cfs_load(cfs_rq, 0); + rcu_read_lock(); + walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); + rcu_read_unlock(); task_delta = cfs_rq->h_nr_running; for_each_sched_entity(se) { @@ -1457,6 +1520,10 @@ static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) list_del_rcu(&cfs_rq->throttled_list); raw_spin_unlock(&cfs_b->lock); + update_rq_clock(rq); + /* update hierarchical throttle state */ + walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); + if (!cfs_rq->load.weight) return; @@ -1601,6 +1668,17 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) { return 0; } + +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) +{ + return 0; +} + +static inline int throttled_lb_pair(struct task_group *tg, + int src_cpu, int dest_cpu) +{ + return 0; +} #endif /************************************************** @@ -2500,6 +2578,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, for_each_leaf_cfs_rq(busiest, cfs_rq) { list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { + if (throttled_lb_pair(task_group(p), + busiest->cpu, this_cpu)) + break; if (!can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) @@ -2615,8 +2696,13 @@ static void update_shares(int cpu) * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. */ - for_each_leaf_cfs_rq(rq, cfs_rq) + for_each_leaf_cfs_rq(rq, cfs_rq) { + /* throttled entities do not contribute to load */ + if (throttled_hierarchy(cfs_rq)) + continue; + update_shares_cpu(cfs_rq->tg, cpu); + } rcu_read_unlock(); } @@ -2666,9 +2752,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, u64 rem_load, moved_load; /* - * empty group + * empty group or part of a throttled hierarchy */ - if (!busiest_cfs_rq->task_weight) + if (!busiest_cfs_rq->task_weight || + throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu)) continue; rem_load = (u64)rem_load_move * busiest_weight; From 574455860e28cff74074629fd8633e260c0908fb Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:38 -0700 Subject: [PATCH 381/678] sched: Migrate throttled tasks on HOTPLUG Throttled tasks are invisisble to cpu-offline since they are not eligible for selection by pick_next_task(). The regular 'escape' path for a thread that is blocked at offline is via ttwu->select_task_rq, however this will not handle a throttled group since there are no individual thread wakeups on an unthrottle. Resolve this by unthrottling offline cpus so that threads can be migrated. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.989000590@google.com Signed-off-by: Ingo Molnar Conflicts: kernel/sched.c --- kernel/sched.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index 9d0f05aed3a..1824d19beb2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6609,6 +6609,30 @@ static void calc_global_load_remove(struct rq *rq) rq->calc_load_active = 0; } +#ifdef CONFIG_CFS_BANDWIDTH +static void unthrottle_offline_cfs_rqs(struct rq *rq) +{ + struct cfs_rq *cfs_rq; + + for_each_leaf_cfs_rq(rq, cfs_rq) { + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + + if (!cfs_rq->runtime_enabled) + continue; + + /* + * clock_task is not advancing so we just need to make sure + * there's some valid quota amount + */ + cfs_rq->runtime_remaining = cfs_b->quota; + if (cfs_rq_throttled(cfs_rq)) + unthrottle_cfs_rq(cfs_rq); + } +} +#else +static void unthrottle_offline_cfs_rqs(struct rq *rq) {} +#endif + /* * Migrate all tasks from the rq, sleeping tasks will be migrated by * try_to_wake_up()->select_task_rq(). @@ -6640,6 +6664,9 @@ static void migrate_tasks(unsigned int dead_cpu) */ rq->rt.rt_throttled = 0; + /* Ensure any throttled groups are reachable by pick_next_task */ + unthrottle_offline_cfs_rqs(rq); + for ( ; ; ) { /* * There's this thread running, bail when that's the only From b2b94efd747aa62cf691062d02ce0aee12451280 Mon Sep 17 00:00:00 2001 From: Nikhil Rao Date: Thu, 21 Jul 2011 09:43:40 -0700 Subject: [PATCH 382/678] sched: Add exports tracking cfs bandwidth control statistics This change introduces statistics exports for the cpu sub-system, these are added through the use of a stat file similar to that exported by other subsystems. The following exports are included: nr_periods: number of periods in which execution occurred nr_throttled: the number of periods above in which execution was throttle throttled_time: cumulative wall-time that any cpus have been throttled for this group Signed-off-by: Paul Turner Signed-off-by: Nikhil Rao Signed-off-by: Bharata B Rao Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184758.198901931@google.com Signed-off-by: Ingo Molnar --- kernel/sched.c | 21 +++++++++++++++++++++ kernel/sched_fair.c | 7 +++++++ 2 files changed, 28 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index 1824d19beb2..e6050fe9638 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -264,6 +264,9 @@ struct cfs_bandwidth { struct hrtimer period_timer; struct list_head throttled_cfs_rq; + /* statistics */ + int nr_periods, nr_throttled; + u64 throttled_time; #endif }; @@ -404,6 +407,7 @@ struct cfs_rq { u64 runtime_expires; s64 runtime_remaining; + u64 throttled_timestamp; int throttled, throttle_count; struct list_head throttled_list; #endif @@ -9748,6 +9752,19 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) return ret; } + +static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct task_group *tg = cgroup_tg(cgrp); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); + + cb->fill(cb, "nr_periods", cfs_b->nr_periods); + cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); + cb->fill(cb, "throttled_time", cfs_b->throttled_time); + + return 0; +} #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -9794,6 +9811,10 @@ static struct cftype cpu_files[] = { .read_u64 = cpu_cfs_period_read_u64, .write_u64 = cpu_cfs_period_write_u64, }, + { + .name = "stat", + .read_map = cpu_stats_show, + }, #endif #ifdef CONFIG_RT_GROUP_SCHED { diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ce9bcf1a436..270d10e2c97 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1500,6 +1500,7 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) rq->nr_running -= task_delta; cfs_rq->throttled = 1; + cfs_rq->throttled_timestamp = rq->clock; raw_spin_lock(&cfs_b->lock); list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); raw_spin_unlock(&cfs_b->lock); @@ -1517,8 +1518,10 @@ static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->throttled = 0; raw_spin_lock(&cfs_b->lock); + cfs_b->throttled_time += rq->clock - cfs_rq->throttled_timestamp; list_del_rcu(&cfs_rq->throttled_list); raw_spin_unlock(&cfs_b->lock); + cfs_rq->throttled_timestamp = 0; update_rq_clock(rq); /* update hierarchical throttle state */ @@ -1606,6 +1609,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) throttled = !list_empty(&cfs_b->throttled_cfs_rq); /* idle depends on !throttled (for the case of a large deficit) */ idle = cfs_b->idle && !throttled; + cfs_b->nr_periods += overrun; /* if we're going inactive then everything else can be deferred */ if (idle) @@ -1619,6 +1623,9 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) goto out_unlock; } + /* account preceding periods in which throttling occurred */ + cfs_b->nr_throttled += overrun; + /* * There are throttled entities so we must first use the new bandwidth * to unthrottle them before making it generally available. This From 4b1bcdbf23622c8deb138dc22cbbd8a8a5b9b370 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:41 -0700 Subject: [PATCH 383/678] sched: Return unused runtime on group dequeue When a local cfs_rq blocks we return the majority of its remaining quota to the global bandwidth pool for use by other runqueues. We do this only when the quota is current and there is more than min_cfs_rq_quota [1ms by default] of runtime remaining on the rq. In the case where there are throttled runqueues and we have sufficient bandwidth to meter out a slice, a second timer is kicked off to handle this delivery, unthrottling where appropriate. Using a 'worst case' antagonist which executes on each cpu for 1ms before moving onto the next on a fairly large machine: no quota generations: 197.47 ms /cgroup/a/cpuacct.usage 199.46 ms /cgroup/a/cpuacct.usage 205.46 ms /cgroup/a/cpuacct.usage 198.46 ms /cgroup/a/cpuacct.usage 208.39 ms /cgroup/a/cpuacct.usage Since we are allowed to use "stale" quota our usage is effectively bounded by the rate of input into the global pool and performance is relatively stable. with quota generations [1s increments]: 119.58 ms /cgroup/a/cpuacct.usage 119.65 ms /cgroup/a/cpuacct.usage 119.64 ms /cgroup/a/cpuacct.usage 119.63 ms /cgroup/a/cpuacct.usage 119.60 ms /cgroup/a/cpuacct.usage The large deficit here is due to quota generations (/intentionally/) preventing us from now using previously stranded slack quota. The cost is that this quota becomes unavailable. with quota generations and quota return: 200.09 ms /cgroup/a/cpuacct.usage 200.09 ms /cgroup/a/cpuacct.usage 198.09 ms /cgroup/a/cpuacct.usage 200.09 ms /cgroup/a/cpuacct.usage 200.06 ms /cgroup/a/cpuacct.usage By returning unused quota we're able to both stably consume our desired quota and prevent unintentional overages due to the abuse of slack quota from previous quota periods (especially on a large machine). Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184758.306848658@google.com Signed-off-by: Ingo Molnar Conflicts: kernel/sched_fair.c --- kernel/sched.c | 15 ++++- kernel/sched_fair.c | 147 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index e6050fe9638..c26a44b148f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -261,7 +261,7 @@ struct cfs_bandwidth { u64 runtime_expires; int idle, timer_active; - struct hrtimer period_timer; + struct hrtimer period_timer, slack_timer; struct list_head throttled_cfs_rq; /* statistics */ @@ -423,6 +423,16 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) static inline u64 default_cfs_period(void); static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); +static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); + +static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) +{ + struct cfs_bandwidth *cfs_b = + container_of(timer, struct cfs_bandwidth, slack_timer); + do_sched_cfs_slack_timer(cfs_b); + + return HRTIMER_NORESTART; +} static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { @@ -455,6 +465,8 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->period_timer.function = sched_cfs_period_timer; + hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cfs_b->slack_timer.function = sched_cfs_slack_timer; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) @@ -490,6 +502,7 @@ static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { hrtimer_cancel(&cfs_b->period_timer); + hrtimer_cancel(&cfs_b->slack_timer); } #else static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 270d10e2c97..9de02326b3c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1048,6 +1048,8 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) __clear_buddies_skip(se); } +static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); + static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { @@ -1086,6 +1088,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (!(flags & DEQUEUE_SLEEP)) se->vruntime -= cfs_rq->min_vruntime; + /* return excess runtime on last dequeue */ + return_cfs_rq_runtime(cfs_rq); + update_min_vruntime(cfs_rq); update_cfs_shares(cfs_rq); } @@ -1667,9 +1672,151 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) return idle; } + +/* a cfs_rq won't donate quota below this amount */ +static const u64 min_cfs_rq_runtime = 1 * NSEC_PER_MSEC; +/* minimum remaining period time to redistribute slack quota */ +static const u64 min_bandwidth_expiration = 2 * NSEC_PER_MSEC; +/* how long we wait to gather additional slack before distributing */ +static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC; + +/* are we near the end of the current quota period? */ +static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire) +{ + struct hrtimer *refresh_timer = &cfs_b->period_timer; + u64 remaining; + + /* if the call-back is running a quota refresh is already occurring */ + if (hrtimer_callback_running(refresh_timer)) + return 1; + + /* is a quota refresh about to occur? */ + remaining = ktime_to_ns(hrtimer_expires_remaining(refresh_timer)); + if (remaining < min_expire) + return 1; + + return 0; +} + +static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b) +{ + u64 min_left = cfs_bandwidth_slack_period + min_bandwidth_expiration; + + /* if there's a quota refresh soon don't bother with slack */ + if (runtime_refresh_within(cfs_b, min_left)) + return; + + start_bandwidth_timer(&cfs_b->slack_timer, + ns_to_ktime(cfs_bandwidth_slack_period)); +} + +/* we know any runtime found here is valid as update_curr() precedes return */ +static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + s64 slack_runtime = cfs_rq->runtime_remaining - min_cfs_rq_runtime; + + if (slack_runtime <= 0) + return; + + raw_spin_lock(&cfs_b->lock); + if (cfs_b->quota != RUNTIME_INF && + cfs_rq->runtime_expires == cfs_b->runtime_expires) { + cfs_b->runtime += slack_runtime; + + /* we are under rq->lock, defer unthrottling using a timer */ + if (cfs_b->runtime > sched_cfs_bandwidth_slice() && + !list_empty(&cfs_b->throttled_cfs_rq)) + start_cfs_slack_bandwidth(cfs_b); + } + raw_spin_unlock(&cfs_b->lock); + + /* even if it's not valid for return we don't want to try again */ + cfs_rq->runtime_remaining -= slack_runtime; +} + +static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) + return; + + __return_cfs_rq_runtime(cfs_rq); +} + +/* + * This is done with a timer (instead of inline with bandwidth return) since + * it's necessary to juggle rq->locks to unthrottle their respective cfs_rqs. + */ +static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) +{ + u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); + u64 expires; + + /* confirm we're still not at a refresh boundary */ + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) + return; + + raw_spin_lock(&cfs_b->lock); + if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) { + runtime = cfs_b->runtime; + cfs_b->runtime = 0; + } + expires = cfs_b->runtime_expires; + raw_spin_unlock(&cfs_b->lock); + + if (!runtime) + return; + + runtime = distribute_cfs_runtime(cfs_b, runtime, expires); + + raw_spin_lock(&cfs_b->lock); + if (expires == cfs_b->runtime_expires) + cfs_b->runtime = runtime; + raw_spin_unlock(&cfs_b->lock); +} + +/* + * When a group wakes up we want to make sure that its quota is not already + * expired/exceeded, otherwise it may be allowed to steal additional ticks of + * runtime as update_curr() throttling can not not trigger until it's on-rq. + */ +static void check_enqueue_throttle(struct cfs_rq *cfs_rq) +{ + /* an active group must be handled by the update_curr()->put() path */ + if (!cfs_rq->runtime_enabled || cfs_rq->curr) + return; + + /* ensure the group is not already throttled */ + if (cfs_rq_throttled(cfs_rq)) + return; + + /* update runtime allocation */ + account_cfs_rq_runtime(cfs_rq, 0); + if (cfs_rq->runtime_remaining <= 0) + throttle_cfs_rq(cfs_rq); +} + +/* conditionally throttle active cfs_rq's from put_prev_entity() */ +static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) +{ + if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) + return; + + /* + * it's possible for a throttled entity to be forced into a running + * state (e.g. set_curr_task), in this case we're finished. + */ + if (cfs_rq_throttled(cfs_rq)) + return; + + throttle_cfs_rq(cfs_rq); +} #else static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} +static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} +static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) { From 6f8c29f11ea07477cd9ee273466b60610e9ef949 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Fri, 29 Jul 2011 16:20:33 +0800 Subject: [PATCH 384/678] sched: Kill WAKEUP_PREEMPT Remove the WAKEUP_PREEMPT feature, disabling it doesn't make any sense and its outlived its use by a long long while. Signed-off-by: Yong Zhang Acked-by: Mike Galbraith Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110729082033.GB12106@zhy Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 9 +-------- kernel/sched_features.h | 5 ----- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9de02326b3c..353b0c50511 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1120,9 +1120,6 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) * narrow margin doesn't have to wait for a full slice. * This also mitigates buddy induced latencies under load. */ - if (!sched_feat(WAKEUP_PREEMPT)) - return; - if (delta_exec < sysctl_sched_min_granularity) return; @@ -1258,7 +1255,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) return; #endif - if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT)) + if (cfs_rq->nr_running > 1) check_preempt_tick(cfs_rq, curr); } @@ -2529,10 +2526,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (unlikely(p->policy != SCHED_NORMAL)) return; - - if (!sched_feat(WAKEUP_PREEMPT)) - return; - find_matching_se(&se, &pse); update_curr(cfs_rq_of(se)); BUG_ON(!pse); diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 48e69155111..f36224da1bb 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -11,11 +11,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 0) */ SCHED_FEAT(START_DEBIT, 1) -/* - * Should wakeups try to preempt running tasks. - */ -SCHED_FEAT(WAKEUP_PREEMPT, 1) - /* * Based on load and program behaviour, see if it makes sense to place * a newly woken task on the same cpu as the task that woke it -- From b1ca948e102254c103f79ae40120c2b188f0be1d Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Thu, 16 Jun 2011 21:55:19 -0400 Subject: [PATCH 385/678] sched: Remove noop in lowest_flag_domain() Checking for the validity of sd is removed, since it is already checked by the for_each_domain macro. Signed-off-by: Hillf Danton Signed-off-by: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/BANLkTimT+Tut-3TshCDm-NiLLXrOznibNA@mail.gmail.com Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 353b0c50511..c37c63513a8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -4291,7 +4291,7 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) struct sched_domain *sd; for_each_domain(cpu, sd) - if (sd && (sd->flags & flag)) + if (sd->flags & flag) break; return sd; From aae1fd088ca0c03499a4ff790667d3a557f99584 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 24 Jul 2011 16:33:13 +0000 Subject: [PATCH 386/678] sched: Allow SD_NODES_PER_DOMAIN to be overridden We want to override the default value of SD_NODES_PER_DOMAIN on ppc64, so move it into linux/topology.h. Signed-off-by: Anton Blanchard Acked-by: Peter Zijlstra Signed-off-by: Benjamin Herrenschmidt --- include/linux/topology.h | 4 ++++ kernel/sched.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/topology.h b/include/linux/topology.h index fc839bfa793..e26db031303 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -201,6 +201,10 @@ int arch_update_cpu_topology(void); .balance_interval = 64, \ } +#ifndef SD_NODES_PER_DOMAIN +#define SD_NODES_PER_DOMAIN 16 +#endif + #ifdef CONFIG_SCHED_BOOK #ifndef SD_BOOK_INIT #error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! diff --git a/kernel/sched.c b/kernel/sched.c index c26a44b148f..5f7dc6e53f5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7385,8 +7385,6 @@ static int __init isolated_cpu_setup(char *str) __setup("isolcpus=", isolated_cpu_setup); -#define SD_NODES_PER_DOMAIN 16 - #ifdef CONFIG_NUMA /** From 6da292c52320bd562bb0342976b7b3cd5ee88e92 Mon Sep 17 00:00:00 2001 From: Wang Xingchao Date: Fri, 16 Sep 2011 13:35:52 -0400 Subject: [PATCH 387/678] sched: Remove redundant test in check_preempt_tick() The caller already checks for nr_running > 1, therefore we don't have to do so again. Signed-off-by: Wang Xingchao Reviewed-by: Paul Turner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1316194552-12019-1-git-send-email-xingchao.wang@intel.com Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c37c63513a8..0e6c7e09838 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1102,6 +1102,8 @@ static void check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) { unsigned long ideal_runtime, delta_exec; + struct sched_entity *se; + s64 delta; ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; @@ -1123,16 +1125,14 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) if (delta_exec < sysctl_sched_min_granularity) return; - if (cfs_rq->nr_running > 1) { - struct sched_entity *se = __pick_first_entity(cfs_rq); - s64 delta = curr->vruntime - se->vruntime; + se = __pick_first_entity(cfs_rq); + delta = curr->vruntime - se->vruntime; - if (delta < 0) - return; + if (delta < 0) + return; - if (delta > ideal_runtime) - resched_task(rq_of(cfs_rq)->curr); - } + if (delta > ideal_runtime) + resched_task(rq_of(cfs_rq)->curr); } static void From cbc28442b8756b68bfb6869eab90f63f5f7d067d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 13 Oct 2011 16:52:28 +0200 Subject: [PATCH 388/678] sched: Add a comment to effective_load() since it's a pain Every time I have to stare at this function I need to completely reverse engineer its workings, about time I write a comment explaining the thing. Collected bits and pieces from previous changelogs, mostly: 4be9daaa1b33701f011f4117f22dc1e45a3e6e34 83378269a5fad98f562ebc0f09c349575e6cbfe1 Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1318518057.27731.2.camel@twins Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 113 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 18 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0e6c7e09838..9a0b544a3e3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } +static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) +{ + long tg_weight; + + /* + * Use this CPU's actual weight instead of the last load_contribution + * to gain a more accurate current total weight. See + * update_cfs_rq_load_contribution(). + */ + tg_weight = atomic_read(&tg->load_weight); + tg_weight -= cfs_rq->load_contribution; + tg_weight += cfs_rq->load.weight; + + return tg_weight; +} + static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { - long load_weight, load, shares; + long tg_weight, load, shares; + tg_weight = calc_tg_weight(tg, cfs_rq); load = cfs_rq->load.weight; - load_weight = atomic_read(&tg->load_weight); - load_weight += load; - load_weight -= cfs_rq->load_contribution; - shares = (tg->shares * load); - if (load_weight) - shares /= load_weight; + if (tg_weight) + shares /= tg_weight; if (shares < MIN_SHARES) shares = MIN_SHARES; @@ -2027,36 +2040,100 @@ static void task_waking_fair(struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. + * + * Calculate the effective load difference if @wl is added (subtracted) to @tg + * on this @cpu and results in a total addition (subtraction) of @wg to the + * total group weight. + * + * Given a runqueue weight distribution (rw_i) we can compute a shares + * distribution (s_i) using: + * + * s_i = rw_i / \Sum rw_j (1) + * + * Suppose we have 4 CPUs and our @tg is a direct child of the root group and + * has 7 equal weight tasks, distributed as below (rw_i), with the resulting + * shares distribution (s_i): + * + * rw_i = { 2, 4, 1, 0 } + * s_i = { 2/7, 4/7, 1/7, 0 } + * + * As per wake_affine() we're interested in the load of two CPUs (the CPU the + * task used to run on and the CPU the waker is running on), we need to + * compute the effect of waking a task on either CPU and, in case of a sync + * wakeup, compute the effect of the current task going to sleep. + * + * So for a change of @wl to the local @cpu with an overall group weight change + * of @wl we can compute the new shares distribution (s'_i) using: + * + * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) + * + * Suppose we're interested in CPUs 0 and 1, and want to compute the load + * differences in waking a task to CPU 0. The additional task changes the + * weight and shares distributions like: + * + * rw'_i = { 3, 4, 1, 0 } + * s'_i = { 3/8, 4/8, 1/8, 0 } + * + * We can then compute the difference in effective weight by using: + * + * dw_i = S * (s'_i - s_i) (3) + * + * Where 'S' is the group weight as seen by its parent. + * + * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) + * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - + * 4/7) times the weight of the group. */ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; - if (!tg->parent) + if (!tg->parent) /* the trivial, non-cgroup case */ return wl; for_each_sched_entity(se) { - long lw, w; + long w, W; tg = se->my_q->tg; - w = se->my_q->load.weight; - /* use this cpu's instantaneous contribution */ - lw = atomic_read(&tg->load_weight); - lw -= se->my_q->load_contribution; - lw += w + wg; + /* + * W = @wg + \Sum rw_j + */ + W = wg + calc_tg_weight(tg, se->my_q); - wl += w; + /* + * w = rw_i + @wl + */ + w = se->my_q->load.weight + wl; - if (lw > 0 && wl < lw) - wl = (wl * tg->shares) / lw; + /* + * wl = S * s'_i; see (2) + */ + if (W > 0 && w < W) + wl = (w * tg->shares) / W; else wl = tg->shares; - /* zero point is MIN_SHARES */ + /* + * Per the above, wl is the new se->load.weight value; since + * those are clipped to [MIN_SHARES, ...) do so now. See + * calc_cfs_shares(). + */ if (wl < MIN_SHARES) wl = MIN_SHARES; + + /* + * wl = dw_i = S * (s'_i - s_i); see (3) + */ wl -= se->load.weight; + + /* + * Recursively apply this logic to all parent groups to compute + * the final effective load change on the root group. Since + * only the @tg group gets extra weight, all parent groups can + * only redistribute existing shares. @wl is the shift in shares + * resulting from this level per the above. + */ wg = 0; } From c85ead521b0052fa6085bd3d3ded3fe670931641 Mon Sep 17 00:00:00 2001 From: Hui Kang Date: Tue, 11 Oct 2011 23:00:59 -0400 Subject: [PATCH 389/678] sched_fair: Fix a typo in the comment describing update_sd_lb_stats Signed-off-by: Hui Kang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1318388459-4427-1-git-send-email-hkang.sunysb@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9a0b544a3e3..2bc9fb5dde3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3571,7 +3571,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, } /** - * update_sd_lb_stats - Update sched_group's statistics for load balancing. + * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @sd: sched_domain whose statistics are to be updated. * @this_cpu: Cpu for which load balance is currently performed. * @idle: Idle status of this_cpu From ed508b1cd6a6b18944a400aaa1b02e20965231e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2011 13:01:10 +0100 Subject: [PATCH 390/678] sched: Avoid SMT siblings in select_idle_sibling() if possible Avoid select_idle_sibling() from picking a sibling thread if there's an idle core that shares cache. This fixes SMT balancing in the increasingly common case where there's a shared cache core available to balance to. Tested-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Suresh Siddha Link: http://lkml.kernel.org/r/1321350377.1421.55.camel@twins Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 2bc9fb5dde3..c09e7693d08 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2317,7 +2317,8 @@ static int select_idle_sibling(struct task_struct *p, int target) int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); struct sched_domain *sd; - int i; + struct sched_group *sg; + int i, smt = 0; /* * If the task is going to be woken-up on this cpu and if it is @@ -2337,25 +2338,38 @@ static int select_idle_sibling(struct task_struct *p, int target) * Otherwise, iterate the domains and find an elegible idle cpu. */ rcu_read_lock(); +again: for_each_domain(target, sd) { - if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) - break; + if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) + continue; - for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { - if (idle_cpu(i)) { - target = i; - break; + if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { + if (!smt) { + smt = 1; + goto again; } + break; } - /* - * Lets stop looking for an idle sibling when we reached - * the domain that spans the current cpu and prev_cpu. - */ - if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && - cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) - break; + sg = sd->groups; + do { + if (!cpumask_intersects(sched_group_cpus(sg), + tsk_cpus_allowed(p))) + goto next; + + for_each_cpu(i, sched_group_cpus(sg)) { + if (!idle_cpu(i)) + goto next; + } + + target = cpumask_first_and(sched_group_cpus(sg), + tsk_cpus_allowed(p)); + goto done; +next: + sg = sg->next; + } while (sg != sd->groups); } +done: rcu_read_unlock(); return target; From 5fbd85c7a263c1d678c7901f4f38b113f1f2da4e Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Mon, 7 Nov 2011 20:26:34 -0800 Subject: [PATCH 391/678] sched: Fix buglet in return_cfs_rq_runtime() In return_cfs_rq_runtime() we want to return bandwidth when there are no remaining tasks, not "return" when this is the case. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111108042736.623812423@google.com Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c09e7693d08..e645300acb0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1747,7 +1747,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) { - if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) + if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) return; __return_cfs_rq_runtime(cfs_rq); From a388981be5df7d93e9ba73fd5daefe2d64b178ee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Dec 2011 11:47:00 -0800 Subject: [PATCH 392/678] sched: Fix select_idle_sibling() regression in selecting an idle SMT sibling Mike Galbraith reported that this recent commit: commit 4dcfe1025b513c2c1da5bf5586adb0e80148f612 Author: Peter Zijlstra Date: Thu Nov 10 13:01:10 2011 +0100 sched: Avoid SMT siblings in select_idle_sibling() if possible stopped selecting an idle SMT sibling when there are no idle cores in a single socket system. Intent of the select_idle_sibling() was to fallback to an idle SMT sibling, if it fails to identify an idle core. But this fallback was not happening on systems where all the scheduler domains had `SD_SHARE_PKG_RESOURCES' flag set. Fix it. Slightly bigger patch of cleaning all these goto's etc is queued up for the next release. Reported-by: Mike Galbraith Reported-by: Alex Shi Signed-off-by: Peter Zijlstra Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1323978421.1984.244.camel@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e645300acb0..1ad148d472f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2343,13 +2343,11 @@ static int select_idle_sibling(struct task_struct *p, int target) if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) continue; - if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { - if (!smt) { - smt = 1; - goto again; - } + if (smt && !(sd->flags & SD_SHARE_CPUPOWER)) + break; + + if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) break; - } sg = sd->groups; do { @@ -2369,6 +2367,10 @@ static int select_idle_sibling(struct task_struct *p, int target) sg = sg->next; } while (sg != sd->groups); } + if (!smt) { + smt = 1; + goto again; + } done: rcu_read_unlock(); From 7704d92ba8e11e802eaf13bbb7169f92ca879dae Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 4 Feb 2013 17:14:59 -0500 Subject: [PATCH 393/678] cpufreq: touchdemand: increase up threshold and decrease sampling rate --- drivers/cpufreq/cpufreq_touchdemand.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index 9cc22a71649..4820f31c2d6 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -46,15 +46,15 @@ unsigned int min_cpus_lock; */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define DEF_FREQUENCY_UP_THRESHOLD (90) +#define DEF_FREQUENCY_UP_THRESHOLD (95) #define DEF_SAMPLING_DOWN_FACTOR (2) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define MICRO_FREQUENCY_UP_THRESHOLD (90) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -#define DEF_SAMPLING_RATE (40000) +#define DEF_SAMPLING_RATE (20000) #define DEF_IO_IS_BUSY (1) /* From ad005c5c1ac4e9a88e208df9ecda9a0f6576e87d Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 4 Feb 2013 17:15:30 -0500 Subject: [PATCH 394/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 40a10f3ce4d..8ddc6726f14 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -88,6 +88,7 @@ CONFIG_RESOURCE_COUNTERS=y # CONFIG_CGROUP_PERF is not set CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y # CONFIG_BLK_CGROUP is not set # CONFIG_NAMESPACES is not set From fc4d31edcdb2368cfe4585a3b0d221d6e4adecd7 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 5 Feb 2013 19:42:05 -0500 Subject: [PATCH 395/678] Revert "Audio: Enable DRC for speaker playback to enlagre output gain." This reverts commit cbefa9c60eb8f2d838e196df8884f0d89e68110f. --- sound/soc/codecs/rt5640.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 03b5aca1b9e..cb059248505 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -1506,18 +1506,6 @@ static int rt5640_spk_event(struct snd_soc_dapm_widget *w, RT5640_PWR_CLS_D, RT5640_PWR_CLS_D); rt5640_index_update_bits(codec, RT5640_CLSD_INT_REG1, 0xf000, 0xf000); - /*Enable DRC */ - snd_soc_update_bits(codec, RT5640_DRC_AGC_1, - RT5640_DRC_AGC_P_MASK | RT5640_DRC_AGC_MASK | - RT5640_DRC_AGC_UPD, - RT5640_DRC_AGC_P_DAC | RT5640_DRC_AGC_EN | - RT5640_DRC_AGC_UPD); - snd_soc_update_bits(codec, RT5640_DRC_AGC_2, - RT5640_DRC_AGC_PRB_MASK, - 0x0003); - snd_soc_update_bits(codec, RT5640_DRC_AGC_3, - RT5640_DRC_AGC_TAR_MASK, - 0x0080); snd_soc_update_bits(codec, RT5640_SPK_VOL, RT5640_L_MUTE | RT5640_R_MUTE, 0); rt5640_update_eqmode(codec,NAKASI); @@ -1527,16 +1515,6 @@ static int rt5640_spk_event(struct snd_soc_dapm_widget *w, snd_soc_update_bits(codec, RT5640_SPK_VOL, RT5640_L_MUTE | RT5640_R_MUTE, RT5640_L_MUTE | RT5640_R_MUTE); - /*Disable DRC */ - snd_soc_update_bits(codec, RT5640_DRC_AGC_1, - RT5640_DRC_AGC_P_MASK | RT5640_DRC_AGC_MASK | - RT5640_DRC_AGC_UPD, RT5640_DRC_AGC_UPD); - snd_soc_update_bits(codec, RT5640_DRC_AGC_2, - RT5640_DRC_AGC_PRB_MASK, - 0x0000); - snd_soc_update_bits(codec, RT5640_DRC_AGC_3, - RT5640_DRC_AGC_TAR_MASK, - 0x0000); rt5640_index_update_bits(codec, RT5640_CLSD_INT_REG1, 0xf000, 0x0000); snd_soc_update_bits(codec, RT5640_PWR_DIG1, From e4f9f67c703b9b8b989f71b9c597c8af25c1fc3d Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 8 Feb 2013 15:41:37 -0500 Subject: [PATCH 396/678] cpufreq: touchdemand: increase up threshold to 98 --- drivers/cpufreq/cpufreq_touchdemand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index 4820f31c2d6..a6019ab3378 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -46,11 +46,11 @@ unsigned int min_cpus_lock; */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define DEF_FREQUENCY_UP_THRESHOLD (95) +#define DEF_FREQUENCY_UP_THRESHOLD (98) #define DEF_SAMPLING_DOWN_FACTOR (2) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_UP_THRESHOLD (98) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) From f75a7ffbfa1fbf1336753cf65cbff03f748d0866 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 8 Feb 2013 16:34:57 -0500 Subject: [PATCH 397/678] mach-tegra: cpu-tegra3.c: default to dual core --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index ca252d8d6e2..34321e3aa5a 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -605,7 +605,7 @@ static int __init tegra_auto_hotplug_debug_init(void) pm_qos_add_request(&min_cpu_req, PM_QOS_MIN_ONLINE_CPUS, PM_QOS_DEFAULT_VALUE); pm_qos_add_request(&max_cpu_req, PM_QOS_MAX_ONLINE_CPUS, - PM_QOS_DEFAULT_VALUE); + (s32)2); if (!debugfs_create_file( "min_cpus", S_IRUGO, hp_debugfs_root, NULL, &min_cpus_fops)) From d68c1c8d8503118544484da8666a3886f5222640 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 11 Feb 2013 17:07:27 -0500 Subject: [PATCH 398/678] cpufreq: performance: lock to 4 cores --- drivers/cpufreq/cpufreq_performance.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index f13a8a9af6a..b89ed16f588 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -15,12 +15,27 @@ #include #include +#include +static struct pm_qos_request_list perf_core_lock_min; +static struct pm_qos_request_list perf_core_lock_max; + static int cpufreq_governor_performance(struct cpufreq_policy *policy, unsigned int event) { switch (event) { case CPUFREQ_GOV_START: + pm_qos_update_request(&perf_core_lock_min, + (s32)4); + pm_qos_update_request(&perf_core_lock_max, + (s32)4); + break; + case CPUFREQ_GOV_STOP: + pm_qos_update_request(&perf_core_lock_min, + (s32)PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + pm_qos_update_request(&perf_core_lock_max, + (s32)PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); + break; case CPUFREQ_GOV_LIMITS: pr_debug("setting to %u kHz because of event %u\n", policy->max, event); @@ -45,12 +60,18 @@ struct cpufreq_governor cpufreq_gov_performance = { static int __init cpufreq_gov_performance_init(void) { + pm_qos_add_request(&perf_core_lock_min, PM_QOS_MIN_ONLINE_CPUS, + PM_QOS_DEFAULT_VALUE); + pm_qos_add_request(&perf_core_lock_max, PM_QOS_MAX_ONLINE_CPUS, + PM_QOS_DEFAULT_VALUE); return cpufreq_register_governor(&cpufreq_gov_performance); } static void __exit cpufreq_gov_performance_exit(void) { + pm_qos_remove_request(&perf_core_lock_min); + pm_qos_remove_request(&perf_core_lock_max); cpufreq_unregister_governor(&cpufreq_gov_performance); } From 98986c61267dc1ee3f3296d71ee90937943e3f7c Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 11 Feb 2013 17:08:00 -0500 Subject: [PATCH 399/678] mach-tegra: cpu-tegra3.c: ignore PMQOS max cpus when hotplugging based on runnable threads. adjust thresholds as well --- arch/arm/mach-tegra/cpu-tegra3.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 34321e3aa5a..384acc03c40 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -194,7 +194,7 @@ enum { #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ - 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ + 5, 16, 18, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; @@ -236,7 +236,7 @@ static noinline int tegra_cpu_speed_balance(void) (nr_run < nr_cpus) || #endif tegra_cpu_edp_favor_down(nr_cpus, mp_overhead) || - (highest_speed <= idle_bottom_freq) || (nr_cpus > max_cpus)) && + (highest_speed <= idle_bottom_freq)) && (nr_cpus > min_cpus)) return TEGRA_CPU_SPEED_SKEWED; @@ -245,10 +245,21 @@ static noinline int tegra_cpu_speed_balance(void) (nr_run <= nr_cpus) || #endif (!tegra_cpu_edp_favor_up(nr_cpus, mp_overhead)) || - (highest_speed <= idle_bottom_freq) || (nr_cpus == max_cpus)) && + (highest_speed <= idle_bottom_freq)) && (nr_cpus >= min_cpus)) return TEGRA_CPU_SPEED_BIASED; +#ifdef CONFIG_TEGRA_RUNNABLE_THREAD + if (nr_run > nr_cpus) + return TEGRA_CPU_SPEED_BALANCED; +#endif + + if ((nr_cpus > max_cpus) && (nr_cpus > min_cpus)) + return TEGRA_CPU_SPEED_SKEWED; + + if ((nr_cpus == max_cpus) && (nr_cpus >= min_cpus)) + return TEGRA_CPU_SPEED_BIASED; + return TEGRA_CPU_SPEED_BALANCED; } void disable_auto_hotplug(void) From 1ca41a7a0d278485a1fd161698a626e93ad513a4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 14 Feb 2013 16:51:40 -0500 Subject: [PATCH 400/678] Revert "mach-tegra: cpu-tegra3.c: ignore PMQOS max cpus when hotplugging based on runnable threads." This reverts commit 98986c61267dc1ee3f3296d71ee90937943e3f7c. --- arch/arm/mach-tegra/cpu-tegra3.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 384acc03c40..34321e3aa5a 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -194,7 +194,7 @@ enum { #define NR_FSHIFT 2 static unsigned int nr_run_thresholds[] = { /* 1, 2, 3, 4 - on-line cpus target */ - 5, 16, 18, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ + 5, 9, 13, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */ }; static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */ static unsigned int nr_run_last; @@ -236,7 +236,7 @@ static noinline int tegra_cpu_speed_balance(void) (nr_run < nr_cpus) || #endif tegra_cpu_edp_favor_down(nr_cpus, mp_overhead) || - (highest_speed <= idle_bottom_freq)) && + (highest_speed <= idle_bottom_freq) || (nr_cpus > max_cpus)) && (nr_cpus > min_cpus)) return TEGRA_CPU_SPEED_SKEWED; @@ -245,21 +245,10 @@ static noinline int tegra_cpu_speed_balance(void) (nr_run <= nr_cpus) || #endif (!tegra_cpu_edp_favor_up(nr_cpus, mp_overhead)) || - (highest_speed <= idle_bottom_freq)) && + (highest_speed <= idle_bottom_freq) || (nr_cpus == max_cpus)) && (nr_cpus >= min_cpus)) return TEGRA_CPU_SPEED_BIASED; -#ifdef CONFIG_TEGRA_RUNNABLE_THREAD - if (nr_run > nr_cpus) - return TEGRA_CPU_SPEED_BALANCED; -#endif - - if ((nr_cpus > max_cpus) && (nr_cpus > min_cpus)) - return TEGRA_CPU_SPEED_SKEWED; - - if ((nr_cpus == max_cpus) && (nr_cpus >= min_cpus)) - return TEGRA_CPU_SPEED_BIASED; - return TEGRA_CPU_SPEED_BALANCED; } void disable_auto_hotplug(void) From effc09cc0524d65ec470b158e2faab61f77d99bb Mon Sep 17 00:00:00 2001 From: Jithu Jance Date: Thu, 15 Nov 2012 17:14:20 -0800 Subject: [PATCH 401/678] net: wireless: bcmdhd: Enable P2P probe request handling only during discovery Change-Id: I2db29d5ed7f66f2a45feb890c81d510fcad24dd2 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/dhd_linux.c | 1 - drivers/net/wireless/bcmdhd/wl_cfg80211.c | 3 +-- drivers/net/wireless/bcmdhd/wl_cfg80211.h | 1 + drivers/net/wireless/bcmdhd/wl_cfgp2p.c | 9 ++++++++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index 6cc2e1ed89b..a871bdba807 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -3391,7 +3391,6 @@ dhd_preinit_ioctls(dhd_pub_t *dhd) setbit(eventmask, WLC_E_ACTION_FRAME_RX); setbit(eventmask, WLC_E_ACTION_FRAME_COMPLETE); setbit(eventmask, WLC_E_ACTION_FRAME_OFF_CHAN_COMPLETE); - setbit(eventmask, WLC_E_P2P_PROBREQ_MSG); setbit(eventmask, WLC_E_P2P_DISC_LISTEN_COMPLETE); } #endif /* WL_CFG80211 */ diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index b4f47d1393c..2f28bf1d3f7 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -331,7 +331,6 @@ static __used bool wl_is_ibssstarter(struct wl_priv *wl); */ static s32 __wl_cfg80211_up(struct wl_priv *wl); static s32 __wl_cfg80211_down(struct wl_priv *wl); -static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); static bool wl_is_linkdown(struct wl_priv *wl, const wl_event_msg_t *e); static bool wl_is_linkup(struct wl_priv *wl, const wl_event_msg_t *e, struct net_device *ndev); static bool wl_is_nonetwork(struct wl_priv *wl, const wl_event_msg_t *e); @@ -7089,7 +7088,7 @@ static s32 wl_config_ifmode(struct wl_priv *wl, struct net_device *ndev, s32 ift return 0; } -static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) +s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) { s8 iovbuf[WL_EVENTING_MASK_LEN + 12]; diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.h b/drivers/net/wireless/bcmdhd/wl_cfg80211.h index dfb0d0de2f7..6d237eee2cc 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.h +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.h @@ -689,4 +689,5 @@ void wl_cfg80211_enable_trace(int level); extern s32 wl_update_wiphybands(struct wl_priv *wl); extern s32 wl_cfg80211_if_is_group_owner(void); extern int wl_cfg80211_update_power_mode(struct net_device *dev); +extern s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); #endif /* _wl_cfg80211_h_ */ diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c index 7bcd14486dd..aedf9705b44 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c @@ -641,7 +641,7 @@ wl_cfgp2p_enable_discovery(struct wl_priv *wl, struct net_device *dev, } set_ie: ret = wl_cfgp2p_set_management_ie(wl, dev, - wl_cfgp2p_find_idx(wl, dev), + wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE), VNDR_IE_PRBREQ_FLAG, ie, ie_len); if (unlikely(ret < 0)) { @@ -1230,6 +1230,10 @@ wl_cfgp2p_listen_complete(struct wl_priv *wl, struct net_device *ndev, } cfg80211_remain_on_channel_expired(ndev, wl->last_roc_id, &wl->remain_on_chan, wl->remain_on_chan_type, GFP_KERNEL); + if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), + WLC_E_P2P_PROBREQ_MSG, false) != BCME_OK) { + CFGP2P_ERR((" failed to unset WLC_E_P2P_PROPREQ_MSG\n")); + } } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); @@ -1321,6 +1325,9 @@ wl_cfgp2p_discover_listen(struct wl_priv *wl, s32 channel, u32 duration_ms) } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); + if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), WLC_E_P2P_PROBREQ_MSG, true) != BCME_OK) { + CFGP2P_ERR((" failed to set WLC_E_P2P_PROPREQ_MSG\n")); + } wl_cfgp2p_set_p2p_mode(wl, WL_P2P_DISC_ST_LISTEN, channel, (u16) duration_ms, wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE)); _timer = &wl->p2p->listen_timer; From 7cf35da07daa8a8581406cf3f92dbc7adf14994d Mon Sep 17 00:00:00 2001 From: Haley Teng Date: Wed, 28 Nov 2012 17:29:28 +0800 Subject: [PATCH 402/678] video: tegra: host: better error handling in alloc_gathers() We should return -ENOMEM in alloc_gathers() when get a NULL pointer from nvmap_alloc() or nvmap_mmap() Bug 1178135 Change-Id: I29321710343983a6e733d95b10a1f7eb586246c0 Signed-off-by: Haley Teng --- drivers/video/tegra/host/nvhost_job.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/tegra/host/nvhost_job.c b/drivers/video/tegra/host/nvhost_job.c index df7a62d689b..576be43ad2e 100644 --- a/drivers/video/tegra/host/nvhost_job.c +++ b/drivers/video/tegra/host/nvhost_job.c @@ -74,6 +74,8 @@ static int alloc_gathers(struct nvhost_job *job, 32, NVMAP_HANDLE_CACHEABLE, 0); if (IS_ERR_OR_NULL(job->gather_mem)) { err = PTR_ERR(job->gather_mem); + if (!job->gather_mem) + err = -ENOMEM; job->gather_mem = NULL; goto error; } @@ -83,6 +85,8 @@ static int alloc_gathers(struct nvhost_job *job, job->gathers = nvmap_mmap(job->gather_mem); if (IS_ERR_OR_NULL(job->gathers)) { err = PTR_ERR(job->gathers); + if (!job->gathers) + err = -ENOMEM; job->gathers = NULL; goto error; } From 2666ef135b8436cfeabe11c1861c9305c60feb72 Mon Sep 17 00:00:00 2001 From: tryout_chen Date: Thu, 15 Nov 2012 14:32:21 +0800 Subject: [PATCH 403/678] Proximity: Enable sensor by it's previous RIL setting when device resume from suspend. Change-Id: Id5b0bfaa79d62e5c41fa704470cf82bf7a0fefac --- drivers/input/proximity/cap1106.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/proximity/cap1106.c b/drivers/input/proximity/cap1106.c index c78299616e8..570a18fac7d 100644 --- a/drivers/input/proximity/cap1106.c +++ b/drivers/input/proximity/cap1106.c @@ -72,6 +72,7 @@ static int prev_c6_status = 0; static int c2_acc_cnt = 0; static int c6_acc_cnt = 0; static int acc_limit = 10; +static int force_enable = 1; /*---------------------------------------------------------------------------- ** FUNCTION DECLARATION @@ -472,6 +473,7 @@ static ssize_t store_sensor_onoff(struct device *dev, struct device_attribute *a return -EINVAL; mutex_lock(&prox_mtx); + force_enable = enable; cap1106_enable_sensor(client, enable); mutex_unlock(&prox_mtx); @@ -892,7 +894,8 @@ static int cap1106_resume(struct i2c_client *client) { PROX_DEBUG("+\n"); mutex_lock(&prox_mtx); - cap1106_enable_sensor(client, 1); + if (force_enable) + cap1106_enable_sensor(client, 1); mutex_unlock(&prox_mtx); PROX_DEBUG("-\n"); return 0; From a81a8debd82ff4a37e50fb6bd000e3b5329f31f4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Feb 2013 21:56:10 -0500 Subject: [PATCH 404/678] Revert "sched: Fix select_idle_sibling() regression in selecting an idle SMT sibling" This reverts commit a388981be5df7d93e9ba73fd5daefe2d64b178ee. --- kernel/sched_fair.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 1ad148d472f..e645300acb0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2343,11 +2343,13 @@ static int select_idle_sibling(struct task_struct *p, int target) if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) continue; - if (smt && !(sd->flags & SD_SHARE_CPUPOWER)) - break; - - if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) + if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { + if (!smt) { + smt = 1; + goto again; + } break; + } sg = sd->groups; do { @@ -2367,10 +2369,6 @@ static int select_idle_sibling(struct task_struct *p, int target) sg = sg->next; } while (sg != sd->groups); } - if (!smt) { - smt = 1; - goto again; - } done: rcu_read_unlock(); From b52777fb53ceed05991fcc93c9327e33afa118d2 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Feb 2013 21:56:25 -0500 Subject: [PATCH 405/678] Revert "sched: Fix buglet in return_cfs_rq_runtime()" This reverts commit 5fbd85c7a263c1d678c7901f4f38b113f1f2da4e. --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e645300acb0..c09e7693d08 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1747,7 +1747,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) { - if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) + if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) return; __return_cfs_rq_runtime(cfs_rq); From 96c5602502d822ee4cc9f2d4b77206f0cd9fd562 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Feb 2013 21:56:34 -0500 Subject: [PATCH 406/678] Revert "sched: Avoid SMT siblings in select_idle_sibling() if possible" This reverts commit ed508b1cd6a6b18944a400aaa1b02e20965231e5. --- kernel/sched_fair.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c09e7693d08..2bc9fb5dde3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2317,8 +2317,7 @@ static int select_idle_sibling(struct task_struct *p, int target) int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); struct sched_domain *sd; - struct sched_group *sg; - int i, smt = 0; + int i; /* * If the task is going to be woken-up on this cpu and if it is @@ -2338,38 +2337,25 @@ static int select_idle_sibling(struct task_struct *p, int target) * Otherwise, iterate the domains and find an elegible idle cpu. */ rcu_read_lock(); -again: for_each_domain(target, sd) { - if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) - continue; - - if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { - if (!smt) { - smt = 1; - goto again; - } + if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) break; - } - - sg = sd->groups; - do { - if (!cpumask_intersects(sched_group_cpus(sg), - tsk_cpus_allowed(p))) - goto next; - for_each_cpu(i, sched_group_cpus(sg)) { - if (!idle_cpu(i)) - goto next; + for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { + if (idle_cpu(i)) { + target = i; + break; } + } - target = cpumask_first_and(sched_group_cpus(sg), - tsk_cpus_allowed(p)); - goto done; -next: - sg = sg->next; - } while (sg != sd->groups); + /* + * Lets stop looking for an idle sibling when we reached + * the domain that spans the current cpu and prev_cpu. + */ + if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && + cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) + break; } -done: rcu_read_unlock(); return target; From 6245ea2fe33e033c31c6885a955d31a5061c7bcc Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Feb 2013 22:07:13 -0500 Subject: [PATCH 407/678] Revert "mach-tegra: cpu-tegra3.c: default to dual core" This reverts commit f75a7ffbfa1fbf1336753cf65cbff03f748d0866. --- arch/arm/mach-tegra/cpu-tegra3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index 34321e3aa5a..ca252d8d6e2 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -605,7 +605,7 @@ static int __init tegra_auto_hotplug_debug_init(void) pm_qos_add_request(&min_cpu_req, PM_QOS_MIN_ONLINE_CPUS, PM_QOS_DEFAULT_VALUE); pm_qos_add_request(&max_cpu_req, PM_QOS_MAX_ONLINE_CPUS, - (s32)2); + PM_QOS_DEFAULT_VALUE); if (!debugfs_create_file( "min_cpus", S_IRUGO, hp_debugfs_root, NULL, &min_cpus_fops)) From 68cb81f82b620ac9635e77ece35bfd24e8764871 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 22 Feb 2013 13:20:56 -0500 Subject: [PATCH 408/678] mach-tegra: add config to toggle decreased min brightness --- arch/arm/configs/metallice_grouper_defconfig | 1 + arch/arm/mach-tegra/Kconfig | 7 +++++++ arch/arm/mach-tegra/board-grouper-panel.c | 11 +++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 8ddc6726f14..47793e3225c 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -315,6 +315,7 @@ CONFIG_TEGRA_PWM=y CONFIG_TEGRA_FIQ_DEBUGGER=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_VOLTAGE_CONTROL=y +CONFIG_DECREASE_MIN_BRIGHTNESS=y CONFIG_GPU_OVERCLOCK=y # CONFIG_GPU_OC_332 is not set CONFIG_GPU_OC_446=y diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index d3b5e5fd94e..313fd49f522 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -286,6 +286,13 @@ config VOLTAGE_CONTROL help User custom voltage control interface +config DECREASE_MIN_BRIGHTNESS + bool "Decrease Minumum Brightness" + depends on TEGRA_SILICON_PLATFORM + default n + help + Decrease minimum backlight for lowest brightness level used by android. + config GPU_OVERCLOCK bool "Enable GPU overclock for Tegra3" depends on TEGRA_SILICON_PLATFORM diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index aeb1139f051..4dc96079407 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -100,11 +100,18 @@ static tegra_dc_bl_output grouper_bl_output_measured = { 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 */ - /* 0 - 9 */ /* Unused by standard android brightness settings */ + /* 0 - 9 */ + /* unused by standard android brightness settings */ 0, 1, 2, 3, 4, 5, 6, 6, 7, 7, /* 10 - 15 */ + /* backlight for level one below SD min (16-1=15) must be the same as backlihgt for SD min (13=13) to prevent flickering */ +#ifdef DECREASE_MIN_BRIGHTNESS 8, 9, 10, 11, 12, 13, - /* 16 - 31 */ /* Screen dimmer minimum */ +#else + 13, 13, 13, 13, 13, 13, +#endif + /* 16 - 31 */ + /* screen dimmer minimum - default: 13 -> 13. currently: 16 -> 13. */ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* 32 - 47 */ 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, From 21989daa6286b49b95ee8b7b545f4681f865beb4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 22 Feb 2013 13:26:43 -0500 Subject: [PATCH 409/678] mach-tegra: add config to toggle dual-core by default --- arch/arm/configs/metallice_grouper_defconfig | 1 + arch/arm/mach-tegra/Kconfig | 7 +++++++ arch/arm/mach-tegra/cpu-tegra3.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 47793e3225c..6ca823b2f0f 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -316,6 +316,7 @@ CONFIG_TEGRA_FIQ_DEBUGGER=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_VOLTAGE_CONTROL=y CONFIG_DECREASE_MIN_BRIGHTNESS=y +# CONFIG_DEFAULT_DUAL_CORE is not set CONFIG_GPU_OVERCLOCK=y # CONFIG_GPU_OC_332 is not set CONFIG_GPU_OC_446=y diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 313fd49f522..53313050f78 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -293,6 +293,13 @@ config DECREASE_MIN_BRIGHTNESS help Decrease minimum backlight for lowest brightness level used by android. +config DEFAULT_DUAL_CORE + bool "Default to Dual-Core" + depends on TEGRA_SILICON_PLATFORM + default n + help + Set the default maximum number of cores to 2 instead of 4. + config GPU_OVERCLOCK bool "Enable GPU overclock for Tegra3" depends on TEGRA_SILICON_PLATFORM diff --git a/arch/arm/mach-tegra/cpu-tegra3.c b/arch/arm/mach-tegra/cpu-tegra3.c index ca252d8d6e2..2a9119c39b4 100755 --- a/arch/arm/mach-tegra/cpu-tegra3.c +++ b/arch/arm/mach-tegra/cpu-tegra3.c @@ -605,7 +605,11 @@ static int __init tegra_auto_hotplug_debug_init(void) pm_qos_add_request(&min_cpu_req, PM_QOS_MIN_ONLINE_CPUS, PM_QOS_DEFAULT_VALUE); pm_qos_add_request(&max_cpu_req, PM_QOS_MAX_ONLINE_CPUS, +#ifdef DEFAULT_DUAL_CORE + (s32)2); +#else PM_QOS_DEFAULT_VALUE); +#endif if (!debugfs_create_file( "min_cpus", S_IRUGO, hp_debugfs_root, NULL, &min_cpus_fops)) From d18429505cc916ddf82cdb8ec584b15a75eb823d Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 22 Feb 2013 17:27:45 -0500 Subject: [PATCH 410/678] mach-tegra: board-grouper-panel.c: change dithering mode and disable SD --- arch/arm/mach-tegra/board-grouper-panel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 4dc96079407..1307153aac4 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -447,7 +447,7 @@ static struct tegra_dc_mode grouper_panel_modes[] = { }; static struct tegra_dc_sd_settings grouper_sd_settings = { - .enable = 1, /* enabled by default. */ + .enable = 0, /* enabled by default. */ .use_auto_pwm = false, .hw_update_delay = 0, .bin_width = -1, @@ -592,7 +592,7 @@ static struct tegra_dc_out grouper_disp1_out = { .type = TEGRA_DC_OUT_RGB, .depth = 18, - .dither = TEGRA_DC_ORDERED_DITHER, + .dither = TEGRA_DC_ERRDIFF_DITHER, .modes = grouper_panel_modes, .n_modes = ARRAY_SIZE(grouper_panel_modes), From 7e85bd8b51d2e2892a615212c91d6197eee2d584 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 1 Apr 2013 13:01:18 -0400 Subject: [PATCH 411/678] mach-tegra: board-grouper-panel.c: fix brightness levels --- arch/arm/mach-tegra/board-grouper-panel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 1307153aac4..f244b5dd578 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -102,7 +102,11 @@ static tegra_dc_bl_output grouper_bl_output_measured = { */ /* 0 - 9 */ /* unused by standard android brightness settings */ - 0, 1, 2, 3, 4, 5, 6, 6, 7, 7, +#ifdef DECREASE_MIN_BRIGHTNESS + 0, 2, 3, 4, 6, 8, 8, 8, 8, 8, +#else + 0, 2, 4, 6, 8, 13, 13, 13, 13, 13, +#endif /* 10 - 15 */ /* backlight for level one below SD min (16-1=15) must be the same as backlihgt for SD min (13=13) to prevent flickering */ #ifdef DECREASE_MIN_BRIGHTNESS From 473d9ab873c0283cb8a64c973c9feb85e4a58613 Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Sun, 13 Jan 2013 22:04:59 +0200 Subject: [PATCH 412/678] block: row: Add some debug information on ROW queues 1. Add a counter for number of requests on queue. 2. Add function to print queues status (number requests currently on queue and number of already dispatched requests in current dispatch cycle). Change-Id: I1e98b9ca33853e6e6a8ddc53240f6cd6981e6024 Signed-off-by: Tatyana Brokhman --- block/row-iosched.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/block/row-iosched.c b/block/row-iosched.c index 56d326919c5..46d2ec78f07 100644 --- a/block/row-iosched.c +++ b/block/row-iosched.c @@ -1,7 +1,7 @@ /* * ROW (Read Over Write) I/O scheduler. * - * Copyright (c) 2012, The Linux Foundation. All rights reserved. + * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -104,6 +104,7 @@ struct rowq_idling_data { * @nr_dispatched: number of requests already dispatched in * the current dispatch cycle * @slice: number of requests to dispatch in a cycle + * @nr_req: number of requests in queue * @idle_data: data for idling on queues * */ @@ -115,6 +116,8 @@ struct row_queue { unsigned int nr_dispatched; unsigned int slice; + unsigned int nr_req; + /* used only for READ queues */ struct rowq_idling_data idle_data; }; @@ -191,6 +194,19 @@ static inline int row_rowq_unserved(struct row_data *rd, return rd->cycle_flags & (1 << qnum); } +static inline void __maybe_unused row_dump_queues_stat(struct row_data *rd) +{ + int i; + + row_log(rd->dispatch_queue, " Queues status (curr_queue=%d):", + rd->curr_queue); + for (i = 0; i < ROWQ_MAX_PRIO; i++) + row_log(rd->dispatch_queue, + "queue%d: dispatched= %d, nr_req=%d", i, + rd->row_queues[i].nr_dispatched, + rd->row_queues[i].nr_req); +} + /******************** Static helper functions ***********************/ /* * kick_queue() - Wake up device driver queue thread @@ -264,6 +280,7 @@ static void row_add_request(struct request_queue *q, list_add_tail(&rq->queuelist, &rqueue->fifo); rd->nr_reqs[rq_data_dir(rq)]++; + rqueue->nr_req++; rq_set_fifo_time(rq, jiffies); /* for statistics*/ if (queue_idling_enabled[rqueue->prio]) { @@ -285,10 +302,11 @@ static void row_add_request(struct request_queue *q, if (urgent_queues[rqueue->prio] && row_rowq_unserved(rd, rqueue->prio)) { row_log_rowq(rd, rqueue->prio, - "added urgent req curr_queue = %d", - rd->curr_queue); + "added urgent request (total on queue=%d)", + rqueue->nr_req); } else - row_log_rowq(rd, rqueue->prio, "added request"); + row_log_rowq(rd, rqueue->prio, + "added request (total on queue=%d)", rqueue->nr_req); } /** @@ -317,8 +335,10 @@ static int row_reinsert_req(struct request_queue *q, list_add(&rq->queuelist, &rqueue->fifo); rd->nr_reqs[rq_data_dir(rq)]++; + rqueue->nr_req++; - row_log_rowq(rd, rqueue->prio, "request reinserted"); + row_log_rowq(rd, rqueue->prio, + "request reinserted (total on queue=%d)", rqueue->nr_req); return 0; } @@ -356,8 +376,10 @@ static void row_remove_request(struct request_queue *q, struct request *rq) { struct row_data *rd = (struct row_data *)q->elevator->elevator_data; + struct row_queue *rqueue = RQ_ROWQ(rq); rq_fifo_clear(rq); + rqueue->nr_req--; rd->nr_reqs[rq_data_dir(rq)]--; } @@ -439,7 +461,8 @@ static int row_dispatch_requests(struct request_queue *q, int force) if (row_rowq_unserved(rd, i) && !list_empty(&rd->row_queues[i].rqueue.fifo)) { row_log_rowq(rd, currq, - " Preemting for unserved rowq%d", i); + " Preemting for unserved rowq%d. (nr_req=%u)", + i, rd->row_queues[currq].rqueue.nr_req); rd->curr_queue = i; row_dispatch_insert(rd); ret = 1; @@ -584,6 +607,7 @@ static void row_merged_requests(struct request_queue *q, struct request *rq, struct row_queue *rqueue = RQ_ROWQ(next); list_del_init(&next->queuelist); + rqueue->nr_req--; rqueue->rdata->nr_reqs[rq_data_dir(rq)]--; } From 09b1c11bab4be700625b5b9fed1af6dc6be619c4 Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Sat, 12 Jan 2013 16:21:12 +0200 Subject: [PATCH 413/678] block: row: Insert dispatch_quantum into struct row_queue There is really no point in keeping the dispatch quantum of a queue outside of it. By inserting it to the row_queue structure we spare extra level in accessing it. Change-Id: Ic77571818b643e71f9aafbb2ca93d0a92158b199 Signed-off-by: Tatyana Brokhman --- block/row-iosched.c | 52 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/block/row-iosched.c b/block/row-iosched.c index 46d2ec78f07..5a922cbc6e6 100644 --- a/block/row-iosched.c +++ b/block/row-iosched.c @@ -105,6 +105,8 @@ struct rowq_idling_data { * the current dispatch cycle * @slice: number of requests to dispatch in a cycle * @nr_req: number of requests in queue + * @dispatch quantum: number of requests this queue may + * dispatch in a dispatch cycle * @idle_data: data for idling on queues * */ @@ -117,6 +119,7 @@ struct row_queue { unsigned int slice; unsigned int nr_req; + int disp_quantum; /* used only for READ queues */ struct rowq_idling_data idle_data; @@ -141,8 +144,7 @@ struct idling_data { /** * struct row_queue - Per block device rqueue structure * @dispatch_queue: dispatch rqueue - * @row_queues: array of priority request queues with - * dispatch quantum per rqueue + * @row_queues: array of priority request queues * @curr_queue: index in the row_queues array of the * currently serviced rqueue * @read_idle: data for idling after READ request @@ -155,10 +157,7 @@ struct idling_data { struct row_data { struct request_queue *dispatch_queue; - struct { - struct row_queue rqueue; - int disp_quantum; - } row_queues[ROWQ_MAX_PRIO]; + struct row_queue row_queues[ROWQ_MAX_PRIO]; enum row_queue_prio curr_queue; @@ -198,8 +197,7 @@ static inline void __maybe_unused row_dump_queues_stat(struct row_data *rd) { int i; - row_log(rd->dispatch_queue, " Queues status (curr_queue=%d):", - rd->curr_queue); + row_log(rd->dispatch_queue, " Queues status:"); for (i = 0; i < ROWQ_MAX_PRIO; i++) row_log(rd->dispatch_queue, "queue%d: dispatched= %d, nr_req=%d", i, @@ -226,7 +224,7 @@ static void kick_queue(struct work_struct *work) row_log_rowq(rd, rd->curr_queue, "Performing delayed work"); /* Mark idling process as done */ - rd->row_queues[rd->curr_queue].rqueue.idle_data.begin_idling = false; + rd->row_queues[rd->curr_queue].idle_data.begin_idling = false; if (!(rd->nr_reqs[0] + rd->nr_reqs[1])) row_log(rd->dispatch_queue, "No requests in scheduler"); @@ -251,7 +249,7 @@ static inline void row_restart_disp_cycle(struct row_data *rd) int i; for (i = 0; i < ROWQ_MAX_PRIO; i++) - rd->row_queues[i].rqueue.nr_dispatched = 0; + rd->row_queues[i].nr_dispatched = 0; rd->curr_queue = ROWQ_PRIO_HIGH_READ; row_log(rd->dispatch_queue, "Restarting cycle"); @@ -356,7 +354,7 @@ static bool row_urgent_pending(struct request_queue *q) for (i = 0; i < ROWQ_MAX_PRIO; i++) if (urgent_queues[i] && row_rowq_unserved(rd, i) && - !list_empty(&rd->row_queues[i].rqueue.fifo)) { + !list_empty(&rd->row_queues[i].fifo)) { row_log_rowq(rd, i, "Urgent request pending (curr=%i)", rd->curr_queue); @@ -395,13 +393,13 @@ static void row_dispatch_insert(struct row_data *rd) { struct request *rq; - rq = rq_entry_fifo(rd->row_queues[rd->curr_queue].rqueue.fifo.next); + rq = rq_entry_fifo(rd->row_queues[rd->curr_queue].fifo.next); row_remove_request(rd->dispatch_queue, rq); elv_dispatch_add_tail(rd->dispatch_queue, rq); - rd->row_queues[rd->curr_queue].rqueue.nr_dispatched++; + rd->row_queues[rd->curr_queue].nr_dispatched++; row_clear_rowq_unserved(rd, rd->curr_queue); row_log_rowq(rd, rd->curr_queue, " Dispatched request nr_disp = %d", - rd->row_queues[rd->curr_queue].rqueue.nr_dispatched); + rd->row_queues[rd->curr_queue].nr_dispatched); } /* @@ -427,7 +425,7 @@ static int row_choose_queue(struct row_data *rd) * Loop over all queues to find the next queue that is not empty. * Stop when you get back to curr_queue */ - while (list_empty(&rd->row_queues[rd->curr_queue].rqueue.fifo) + while (list_empty(&rd->row_queues[rd->curr_queue].fifo) && rd->curr_queue != prev_curr_queue) { /* Mark rqueue as unserved */ row_mark_rowq_unserved(rd, rd->curr_queue); @@ -459,10 +457,10 @@ static int row_dispatch_requests(struct request_queue *q, int force) */ for (i = 0; i < currq; i++) { if (row_rowq_unserved(rd, i) && - !list_empty(&rd->row_queues[i].rqueue.fifo)) { + !list_empty(&rd->row_queues[i].fifo)) { row_log_rowq(rd, currq, " Preemting for unserved rowq%d. (nr_req=%u)", - i, rd->row_queues[currq].rqueue.nr_req); + i, rd->row_queues[currq].nr_req); rd->curr_queue = i; row_dispatch_insert(rd); ret = 1; @@ -470,9 +468,9 @@ static int row_dispatch_requests(struct request_queue *q, int force) } } - if (rd->row_queues[currq].rqueue.nr_dispatched >= + if (rd->row_queues[currq].nr_dispatched >= rd->row_queues[currq].disp_quantum) { - rd->row_queues[currq].rqueue.nr_dispatched = 0; + rd->row_queues[currq].nr_dispatched = 0; row_log_rowq(rd, currq, "Expiring rqueue"); ret = row_choose_queue(rd); if (ret) @@ -481,7 +479,7 @@ static int row_dispatch_requests(struct request_queue *q, int force) } /* Dispatch from curr_queue */ - if (list_empty(&rd->row_queues[currq].rqueue.fifo)) { + if (list_empty(&rd->row_queues[currq].fifo)) { /* check idling */ if (delayed_work_pending(&rd->read_idle.idle_work)) { if (force) { @@ -497,7 +495,7 @@ static int row_dispatch_requests(struct request_queue *q, int force) } if (!force && queue_idling_enabled[currq] && - rd->row_queues[currq].rqueue.idle_data.begin_idling) { + rd->row_queues[currq].idle_data.begin_idling) { if (!queue_delayed_work(rd->read_idle.idle_workqueue, &rd->read_idle.idle_work, rd->read_idle.idle_time)) { @@ -544,12 +542,12 @@ static void *row_init_queue(struct request_queue *q) return NULL; for (i = 0; i < ROWQ_MAX_PRIO; i++) { - INIT_LIST_HEAD(&rdata->row_queues[i].rqueue.fifo); + INIT_LIST_HEAD(&rdata->row_queues[i].fifo); rdata->row_queues[i].disp_quantum = queue_quantum[i]; - rdata->row_queues[i].rqueue.rdata = rdata; - rdata->row_queues[i].rqueue.prio = i; - rdata->row_queues[i].rqueue.idle_data.begin_idling = false; - rdata->row_queues[i].rqueue.idle_data.last_insert_time = + rdata->row_queues[i].rdata = rdata; + rdata->row_queues[i].prio = i; + rdata->row_queues[i].idle_data.begin_idling = false; + rdata->row_queues[i].idle_data.last_insert_time = ktime_set(0, 0); } @@ -588,7 +586,7 @@ static void row_exit_queue(struct elevator_queue *e) int i; for (i = 0; i < ROWQ_MAX_PRIO; i++) - BUG_ON(!list_empty(&rd->row_queues[i].rqueue.fifo)); + BUG_ON(!list_empty(&rd->row_queues[i].fifo)); (void)cancel_delayed_work_sync(&rd->read_idle.idle_work); BUG_ON(delayed_work_pending(&rd->read_idle.idle_work)); destroy_workqueue(rd->read_idle.idle_workqueue); From 1127d617ca6e5cfb8a1bcbe12956c7cb0b0f69f4 Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Sat, 12 Jan 2013 16:21:47 +0200 Subject: [PATCH 414/678] block: row: fix sysfs functions - idle_time conversion idle_time was updated to be stored in msec instead of jiffies. So there is no need to convert the value when reading from user or displaying the value to him. Change-Id: I58e074b204e90a90536d32199ac668112966e9cf Signed-off-by: Tatyana Brokhman --- block/row-iosched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/row-iosched.c b/block/row-iosched.c index 5a922cbc6e6..bb2c950619f 100644 --- a/block/row-iosched.c +++ b/block/row-iosched.c @@ -691,7 +691,7 @@ SHOW_FUNCTION(row_lp_read_quantum_show, rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, 0); SHOW_FUNCTION(row_lp_swrite_quantum_show, rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, 0); -SHOW_FUNCTION(row_read_idle_show, rowd->read_idle.idle_time, 1); +SHOW_FUNCTION(row_read_idle_show, rowd->read_idle.idle_time, 0); SHOW_FUNCTION(row_read_idle_freq_show, rowd->read_idle.freq, 0); #undef SHOW_FUNCTION @@ -731,7 +731,7 @@ STORE_FUNCTION(row_lp_read_quantum_store, STORE_FUNCTION(row_lp_swrite_quantum_store, &rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, 1, INT_MAX, 1); -STORE_FUNCTION(row_read_idle_store, &rowd->read_idle.idle_time, 1, INT_MAX, 1); +STORE_FUNCTION(row_read_idle_store, &rowd->read_idle.idle_time, 1, INT_MAX, 0); STORE_FUNCTION(row_read_idle_freq_store, &rowd->read_idle.freq, 1, INT_MAX, 0); #undef STORE_FUNCTION From 336cea1493d6ecfcc217bbc3bfd4c1901c9fbd89 Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Sat, 12 Jan 2013 16:23:18 +0200 Subject: [PATCH 415/678] block: row: Aggregate row_queue parameters to one structure Each ROW queues has several parameters which default values are defined in separate arrays. This patch aggregates all default values into one array. The values in question are: - is idling enabled for the queue - queue quantum - can the queue notify on urgent request Change-Id: I3821b0a042542295069b340406a16b1000873ec6 Signed-off-by: Tatyana Brokhman --- block/row-iosched.c | 69 +++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/block/row-iosched.c b/block/row-iosched.c index bb2c950619f..0ebcbdf6c91 100644 --- a/block/row-iosched.c +++ b/block/row-iosched.c @@ -47,37 +47,38 @@ enum row_queue_prio { ROWQ_MAX_PRIO, }; -/* Flags indicating whether idling is enabled on the queue */ -static const bool queue_idling_enabled[] = { - true, /* ROWQ_PRIO_HIGH_READ */ - true, /* ROWQ_PRIO_REG_READ */ - false, /* ROWQ_PRIO_HIGH_SWRITE */ - false, /* ROWQ_PRIO_REG_SWRITE */ - false, /* ROWQ_PRIO_REG_WRITE */ - false, /* ROWQ_PRIO_LOW_READ */ - false, /* ROWQ_PRIO_LOW_SWRITE */ -}; - -/* Flags indicating whether the queue can notify on urgent requests */ -static const bool urgent_queues[] = { - true, /* ROWQ_PRIO_HIGH_READ */ - true, /* ROWQ_PRIO_REG_READ */ - false, /* ROWQ_PRIO_HIGH_SWRITE */ - false, /* ROWQ_PRIO_REG_SWRITE */ - false, /* ROWQ_PRIO_REG_WRITE */ - false, /* ROWQ_PRIO_LOW_READ */ - false, /* ROWQ_PRIO_LOW_SWRITE */ +/** + * struct row_queue_params - ROW queue parameters + * @idling_enabled: Flag indicating whether idling is enable on + * the queue + * @quantum: Number of requests to be dispatched from this queue + * in a dispatch cycle + * @is_urgent: Flags indicating whether the queue can notify on + * urgent requests + * + */ +struct row_queue_params { + bool idling_enabled; + int quantum; + bool is_urgent; }; -/* Default values for row queues quantums in each dispatch cycle */ -static const int queue_quantum[] = { - 100, /* ROWQ_PRIO_HIGH_READ */ - 100, /* ROWQ_PRIO_REG_READ */ - 2, /* ROWQ_PRIO_HIGH_SWRITE */ - 1, /* ROWQ_PRIO_REG_SWRITE */ - 1, /* ROWQ_PRIO_REG_WRITE */ - 1, /* ROWQ_PRIO_LOW_READ */ - 1 /* ROWQ_PRIO_LOW_SWRITE */ +/* + * This array holds the default values of the different configurables + * for each ROW queue. Each row of the array holds the following values: + * {idling_enabled, quantum, is_urgent} + * Each row corresponds to a queue with the same index (according to + * enum row_queue_prio) + */ +static const struct row_queue_params row_queues_def[] = { +/* idling_enabled, quantum, is_urgent */ + {true, 100, true}, /* ROWQ_PRIO_HIGH_READ */ + {true, 100, true}, /* ROWQ_PRIO_REG_READ */ + {false, 2, false}, /* ROWQ_PRIO_HIGH_SWRITE */ + {false, 1, false}, /* ROWQ_PRIO_REG_SWRITE */ + {false, 1, false}, /* ROWQ_PRIO_REG_WRITE */ + {false, 1, false}, /* ROWQ_PRIO_LOW_READ */ + {false, 1, false} /* ROWQ_PRIO_LOW_SWRITE */ }; /* Default values for idling on read queues (in msec) */ @@ -281,7 +282,7 @@ static void row_add_request(struct request_queue *q, rqueue->nr_req++; rq_set_fifo_time(rq, jiffies); /* for statistics*/ - if (queue_idling_enabled[rqueue->prio]) { + if (row_queues_def[rqueue->prio].idling_enabled) { if (delayed_work_pending(&rd->read_idle.idle_work)) (void)cancel_delayed_work( &rd->read_idle.idle_work); @@ -297,7 +298,7 @@ static void row_add_request(struct request_queue *q, rqueue->idle_data.last_insert_time = ktime_get(); } - if (urgent_queues[rqueue->prio] && + if (row_queues_def[rqueue->prio].is_urgent && row_rowq_unserved(rd, rqueue->prio)) { row_log_rowq(rd, rqueue->prio, "added urgent request (total on queue=%d)", @@ -353,7 +354,7 @@ static bool row_urgent_pending(struct request_queue *q) int i; for (i = 0; i < ROWQ_MAX_PRIO; i++) - if (urgent_queues[i] && row_rowq_unserved(rd, i) && + if (row_queues_def[i].is_urgent && row_rowq_unserved(rd, i) && !list_empty(&rd->row_queues[i].fifo)) { row_log_rowq(rd, i, "Urgent request pending (curr=%i)", @@ -494,7 +495,7 @@ static int row_dispatch_requests(struct request_queue *q, int force) } } - if (!force && queue_idling_enabled[currq] && + if (!force && row_queues_def[currq].idling_enabled && rd->row_queues[currq].idle_data.begin_idling) { if (!queue_delayed_work(rd->read_idle.idle_workqueue, &rd->read_idle.idle_work, @@ -543,7 +544,7 @@ static void *row_init_queue(struct request_queue *q) for (i = 0; i < ROWQ_MAX_PRIO; i++) { INIT_LIST_HEAD(&rdata->row_queues[i].fifo); - rdata->row_queues[i].disp_quantum = queue_quantum[i]; + rdata->row_queues[i].disp_quantum = row_queues_def[i].quantum; rdata->row_queues[i].rdata = rdata; rdata->row_queues[i].prio = i; rdata->row_queues[i].idle_data.begin_idling = false; From 9c0656765d23cbd250d521ee4626c8793167034a Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 2 Apr 2013 16:21:19 -0400 Subject: [PATCH 416/678] Revert "arm: tegra: Fix modem_reset_flag assignment" This reverts commit 04f46fdae40969cd69ece12ffaf62c801988aa64. --- arch/arm/mach-tegra/baseband-xmm-power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/baseband-xmm-power.c b/arch/arm/mach-tegra/baseband-xmm-power.c index 40ef4e1d2da..11d9b3f309c 100755 --- a/arch/arm/mach-tegra/baseband-xmm-power.c +++ b/arch/arm/mach-tegra/baseband-xmm-power.c @@ -807,7 +807,7 @@ static void baseband_xmm_power_init2_work(struct work_struct *work) } else pr_err("%s: hsic_register is missing\n", __func__); register_hsic_device = false; - modem_reset_flag = 0; + modem_reset_flag == 0; } } From ea30b1fef692f7f13224ec3476114cd6777dfc8d Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 3 Apr 2013 17:35:16 -0400 Subject: [PATCH 417/678] defconfig: switch to SLAB --- arch/arm/configs/metallice_grouper_defconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 6ca823b2f0f..f41de23d3ba 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -136,10 +136,9 @@ CONFIG_PERF_EVENTS=y # CONFIG_DEBUG_PERF_USE_VMALLOC is not set CONFIG_VM_EVENT_COUNTERS=y CONFIG_PCI_QUIRKS=y -# CONFIG_SLUB_DEBUG is not set CONFIG_COMPAT_BRK=y -# CONFIG_SLAB is not set -CONFIG_SLUB=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set CONFIG_HAVE_OPROFILE=y @@ -157,6 +156,7 @@ CONFIG_HAVE_HW_BREAKPOINT=y # # CONFIG_GCOV_KERNEL is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 CONFIG_MODULES=y @@ -3155,7 +3155,7 @@ CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set # CONFIG_DEBUG_OBJECTS is not set -# CONFIG_SLUB_STATS is not set +# CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_PREEMPT is not set # CONFIG_DEBUG_RT_MUTEXES is not set From bd2904878fbc232ade8d3e17ec45f99a2a1a1007 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 Feb 2010 09:54:54 +0100 Subject: [PATCH 418/678] locking, rwsem: Annotate inner lock as raw There is no reason to allow the lock protecting rwsems (the ownerless variant) to be preemptible on -rt. Convert it to raw. In mainline this change documents the low level nature of the lock - otherwise there's no functional difference. Lockdep and Sparse checking will work as usual. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/rwsem-spinlock.h | 2 +- include/linux/rwsem.h | 10 +++++---- lib/rwsem-spinlock.c | 38 +++++++++++++++++----------------- lib/rwsem.c | 14 ++++++------- 4 files changed, 33 insertions(+), 31 deletions(-) diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index 34701241b67..d5b13bc07a0 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -22,7 +22,7 @@ */ struct rw_semaphore { __s32 activity; - spinlock_t wait_lock; + raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 6a6741440cb..63d40655439 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -25,7 +25,7 @@ struct rw_semaphore; /* All arch specific implementations share the same struct */ struct rw_semaphore { long count; - spinlock_t wait_lock; + raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -56,9 +56,11 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) # define __RWSEM_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, \ + __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index ffc9fc7f3b0..f2393c21fe8 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -22,9 +22,9 @@ int rwsem_is_locked(struct rw_semaphore *sem) int ret = 1; unsigned long flags; - if (spin_trylock_irqsave(&sem->wait_lock, flags)) { + if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { ret = (sem->activity != 0); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } return ret; } @@ -44,7 +44,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, lockdep_init_map(&sem->dep_map, name, key, 0); #endif sem->activity = 0; - spin_lock_init(&sem->wait_lock); + raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } EXPORT_SYMBOL(__init_rwsem); @@ -145,12 +145,12 @@ void __sched __down_read(struct rw_semaphore *sem) struct task_struct *tsk; unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity >= 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity++; - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); goto out; } @@ -165,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem) list_add_tail(&waiter.list, &sem->wait_list); /* we don't need to touch the semaphore struct anymore */ - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); /* wait to be given the lock */ for (;;) { @@ -189,7 +189,7 @@ int __down_read_trylock(struct rw_semaphore *sem) int ret = 0; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity >= 0 && list_empty(&sem->wait_list)) { /* granted */ @@ -197,7 +197,7 @@ int __down_read_trylock(struct rw_semaphore *sem) ret = 1; } - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; } @@ -212,12 +212,12 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) struct task_struct *tsk; unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity == 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity = -1; - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); goto out; } @@ -232,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) list_add_tail(&waiter.list, &sem->wait_list); /* we don't need to touch the semaphore struct anymore */ - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); /* wait to be given the lock */ for (;;) { @@ -260,7 +260,7 @@ int __down_write_trylock(struct rw_semaphore *sem) unsigned long flags; int ret = 0; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (sem->activity == 0 && list_empty(&sem->wait_list)) { /* granted */ @@ -268,7 +268,7 @@ int __down_write_trylock(struct rw_semaphore *sem) ret = 1; } - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; } @@ -280,12 +280,12 @@ void __up_read(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); if (--sem->activity == 0 && !list_empty(&sem->wait_list)) sem = __rwsem_wake_one_writer(sem); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* @@ -295,13 +295,13 @@ void __up_write(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); sem->activity = 0; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 1); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* @@ -312,12 +312,12 @@ void __downgrade_write(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); sem->activity = 1; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 0); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } diff --git a/lib/rwsem.c b/lib/rwsem.c index aa7c3052261..410aa1189b1 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -22,7 +22,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, lockdep_init_map(&sem->dep_map, name, key, 0); #endif sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); + raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } @@ -180,7 +180,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, set_task_state(tsk, TASK_UNINTERRUPTIBLE); /* set up my own style of waitqueue */ - spin_lock_irq(&sem->wait_lock); + raw_spin_lock_irq(&sem->wait_lock); waiter.task = tsk; waiter.flags = flags; get_task_struct(tsk); @@ -204,7 +204,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, adjustment == -RWSEM_ACTIVE_WRITE_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); - spin_unlock_irq(&sem->wait_lock); + raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { @@ -245,13 +245,13 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return sem; } @@ -265,13 +265,13 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; - spin_lock_irqsave(&sem->wait_lock, flags); + raw_spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); - spin_unlock_irqrestore(&sem->wait_lock, flags); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return sem; } From 465999084343e59cd459fe1337793ca4ce9bf170 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Fri, 1 Feb 2013 18:59:16 +0800 Subject: [PATCH 419/678] rwsem-spinlock: Implement writer lock-stealing for better scalability We (Linux Kernel Performance project) found a regression introduced by commit: 5a505085f043 mm/rmap: Convert the struct anon_vma::mutex to an rwsem which converted all anon_vma::mutex locks rwsem write locks. The semantics are the same, but the behavioral difference is quite huge in some cases. After investigating it we found the root cause: mutexes support lock stealing while rwsems don't. Here is the link for the detailed regression report: https://lkml.org/lkml/2013/1/29/84 Ingo suggested adding write lock stealing to rwsems: "I think we should allow lock-steal between rwsem writers - that will not hurt fairness as most rwsem fairness concerns relate to reader vs. writer fairness" And here is the rwsem-spinlock version. With this patch, we got a double performance increase in one test box with following aim7 workfile: FILESIZE: 1M POOLSIZE: 10M 10 fork_test /usr/bin/time output w/o patch /usr/bin/time_output with patch -- Percent of CPU this job got: 369% Percent of CPU this job got: 537% Voluntary context switches: 640595016 Voluntary context switches: 157915561 We got a 45% increase in CPU usage and saved about 3/4 voluntary context switches. Reported-by: LKP project Suggested-by: Ingo Molnar Signed-off-by: Yuanhan Liu Cc: Alex Shi Cc: David Howells Cc: Michel Lespinasse Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Anton Blanchard Cc: Arjan van de Ven Cc: paul.gortmaker@windriver.com Link: http://lkml.kernel.org/r/1359716356-23865-1-git-send-email-yuanhan.liu@linux.intel.com Signed-off-by: Ingo Molnar --- lib/rwsem-spinlock.c | 65 +++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 43 deletions(-) diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index f2393c21fe8..f3d9cfe7719 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) goto dont_wake_writers; } - /* if we are allowed to wake writers try to grant a single write lock - * if there's a writer at the front of the queue - * - we leave the 'waiting count' incremented to signify potential - * contention + /* + * as we support write lock stealing, we can't set sem->activity + * to -1 here to indicate we get the lock. Instead, we wake it up + * to let it go get it again. */ if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { - sem->activity = -1; - list_del(&waiter->list); - tsk = waiter->task; - /* Don't touch waiter after ->task has been NULLed */ - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + wake_up_process(waiter->task); goto out; } @@ -121,18 +114,10 @@ static inline struct rw_semaphore * __rwsem_wake_one_writer(struct rw_semaphore *sem) { struct rwsem_waiter *waiter; - struct task_struct *tsk; - - sem->activity = -1; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - list_del(&waiter->list); + wake_up_process(waiter->task); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); return sem; } @@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore - * - we increment the waiting count anyway to indicate an exclusive lock */ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) { @@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity = -1; - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ + tsk = current; waiter.task = tsk; waiter.flags = RWSEM_WAITING_FOR_WRITE; - get_task_struct(tsk); - list_add_tail(&waiter.list, &sem->wait_list); - /* we don't need to touch the semaphore struct anymore */ - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - /* wait to be given the lock */ + /* wait for someone to release the lock */ for (;;) { - if (!waiter.task) + /* + * That is the key to support write lock stealing: allows the + * task already on CPU to get the lock soon rather than put + * itself into sleep and waiting for system woke it or someone + * else in the head of the wait list up. + */ + if (sem->activity == 0) break; - schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + schedule(); + raw_spin_lock_irqsave(&sem->wait_lock, flags); } + /* got the lock */ + sem->activity = -1; + list_del(&waiter.list); - tsk->state = TASK_RUNNING; - out: - ; + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } void __sched __down_write(struct rw_semaphore *sem) From 3050a587daf5b7b3ff19e42ec18775981cb26dfb Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 5 Feb 2013 21:11:55 +0800 Subject: [PATCH 420/678] rwsem: Implement writer lock-stealing for better scalability Commit 5a505085f043 ("mm/rmap: Convert the struct anon_vma::mutex to an rwsem") changed struct anon_vma::mutex to an rwsem, which caused aim7 fork_test performance to drop by 50%. Yuanhan Liu did the following excellent analysis: https://lkml.org/lkml/2013/1/29/84 and found that the regression is caused by strict, serialized, FIFO sequential write-ownership of rwsems. Ingo suggested implementing opportunistic lock-stealing for the front writer task in the waitqueue. Yuanhan Liu implemented lock-stealing for spinlock-rwsems, which indeed recovered much of the regression - confirming the analysis that the main factor in the regression was the FIFO writer-fairness of rwsems. In this patch we allow lock-stealing to happen when the first waiter is also writer. With that change in place the aim7 fork_test performance is fully recovered on my Intel NHM EP, NHM EX, SNB EP 2S and 4S test-machines. Reported-by: lkp@linux.intel.com Reported-by: Yuanhan Liu Signed-off-by: Alex Shi Cc: David Howells Cc: Michel Lespinasse Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Anton Blanchard Cc: Arjan van de Ven Cc: paul.gortmaker@windriver.com Link: https://lkml.org/lkml/2013/1/29/84 Link: http://lkml.kernel.org/r/1360069915-31619-1-git-send-email-alex.shi@intel.com [ Small stylistic fixes, updated changelog. ] Signed-off-by: Ingo Molnar --- lib/rwsem.c | 75 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 410aa1189b1..8cf5685b8fb 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -2,6 +2,8 @@ * * Written by David Howells (dhowells@redhat.com). * Derived from arch/i386/kernel/semaphore.c + * + * Writer lock-stealing by Alex Shi */ #include #include @@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop, adjustment; + signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) @@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) */ goto out; - /* There's a writer at the front of the queue - try to grant it the - * write lock. However, we only wake this writer if we can transition - * the active part of the count from 0 -> 1 - */ - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - if (waiter->list.next == &sem->wait_list) - adjustment -= RWSEM_WAITING_BIAS; - - try_again_write: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (oldcount & RWSEM_ACTIVE_MASK) - /* Someone grabbed the sem already */ - goto undo_write; - - /* We must be careful not to touch 'waiter' after we set ->task = NULL. - * It is an allocated on the waiter's stack and may become invalid at - * any time after that point (due to a wakeup from another source). - */ - list_del(&waiter->list); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + /* Wake up the writing waiter and let the task grab the sem: */ + wake_up_process(waiter->task); goto out; readers_only: @@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) out: return sem; +} + +/* Try to get write sem, caller holds sem->wait_lock: */ +static int try_get_writer_sem(struct rw_semaphore *sem, + struct rwsem_waiter *waiter) +{ + struct rwsem_waiter *fwaiter; + long oldcount, adjustment; - /* undo the change to the active count, but check for a transition - * 1->0 */ - undo_write: + /* only steal when first waiter is writing */ + fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) + return 0; + + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + /* Only one waiter in the queue: */ + if (fwaiter == waiter && waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + +try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (!(oldcount & RWSEM_ACTIVE_MASK)) { + /* No active lock: */ + struct task_struct *tsk = waiter->task; + + list_del(&waiter->list); + smp_mb(); + put_task_struct(tsk); + tsk->state = TASK_RUNNING; + return 1; + } + /* some one grabbed the sem already */ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) - goto out; + return 0; goto try_again_write; } @@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem, for (;;) { if (!waiter.task) break; + + raw_spin_lock_irq(&sem->wait_lock); + /* Try to get the writer sem, may steal from the head writer: */ + if (flags == RWSEM_WAITING_FOR_WRITE) + if (try_get_writer_sem(sem, &waiter)) { + raw_spin_unlock_irq(&sem->wait_lock); + return sem; + } + raw_spin_unlock_irq(&sem->wait_lock); schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); } From 81718c25389da227925c75a0afa826ecb8d9d900 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:08:16 -0500 Subject: [PATCH 421/678] rwsem: make the waiter type an enumeration rather than a bitmask We are not planning to add some new waiter flags, so we can convert the waiter type into an enumeration. Background: David Howells suggested I do this back when I tried adding a new waiter type for unfair readers. However, I believe the cleanup applies regardless of that use case. Signed-off-by: Michel Lespinasse --- lib/rwsem-spinlock.c | 19 +++++++++++-------- lib/rwsem.c | 23 +++++++++++++---------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index f3d9cfe7719..f6f139948a7 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -9,12 +9,15 @@ #include #include +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + struct rwsem_waiter { struct list_head list; struct task_struct *task; - unsigned int flags; -#define RWSEM_WAITING_FOR_READ 0x00000001 -#define RWSEM_WAITING_FOR_WRITE 0x00000002 + enum rwsem_waiter_type type; }; int rwsem_is_locked(struct rw_semaphore *sem) @@ -68,7 +71,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!wakewrite) { - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) + if (waiter->type == RWSEM_WAITING_FOR_WRITE) goto out; goto dont_wake_writers; } @@ -78,7 +81,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) * to -1 here to indicate we get the lock. Instead, we wake it up * to let it go get it again. */ - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { wake_up_process(waiter->task); goto out; } @@ -86,7 +89,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) /* grant an infinite number of read locks to the front of the queue */ dont_wake_writers: woken = 0; - while (waiter->flags & RWSEM_WAITING_FOR_READ) { + while (waiter->type == RWSEM_WAITING_FOR_READ) { struct list_head *next = waiter->list.next; list_del(&waiter->list); @@ -144,7 +147,7 @@ void __sched __down_read(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; - waiter.flags = RWSEM_WAITING_FOR_READ; + waiter.type = RWSEM_WAITING_FOR_READ; get_task_struct(tsk); list_add_tail(&waiter.list, &sem->wait_list); @@ -201,7 +204,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) /* set up my own style of waitqueue */ tsk = current; waiter.task = tsk; - waiter.flags = RWSEM_WAITING_FOR_WRITE; + waiter.type = RWSEM_WAITING_FOR_WRITE; list_add_tail(&waiter.list, &sem->wait_list); /* wait for someone to release the lock */ diff --git a/lib/rwsem.c b/lib/rwsem.c index 8cf5685b8fb..69924ae8345 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -30,12 +30,15 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, EXPORT_SYMBOL(__init_rwsem); +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + struct rwsem_waiter { struct list_head list; struct task_struct *task; - unsigned int flags; -#define RWSEM_WAITING_FOR_READ 0x00000001 -#define RWSEM_WAITING_FOR_WRITE 0x00000002 + enum rwsem_waiter_type type; }; /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and @@ -65,7 +68,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) + if (waiter->type != RWSEM_WAITING_FOR_WRITE) goto readers_only; if (wake_type == RWSEM_WAKE_READ_OWNED) @@ -112,10 +115,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) waiter = list_entry(waiter->list.next, struct rwsem_waiter, list); - } while (waiter->flags & RWSEM_WAITING_FOR_READ); + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); adjustment = woken * RWSEM_ACTIVE_READ_BIAS; - if (waiter->flags & RWSEM_WAITING_FOR_READ) + if (waiter->type != RWSEM_WAITING_FOR_WRITE) /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; @@ -148,7 +151,7 @@ static int try_get_writer_sem(struct rw_semaphore *sem, /* only steal when first waiter is writing */ fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) + if (fwaiter->type != RWSEM_WAITING_FOR_WRITE) return 0; adjustment = RWSEM_ACTIVE_WRITE_BIAS; @@ -179,7 +182,7 @@ static int try_get_writer_sem(struct rw_semaphore *sem, */ static struct rw_semaphore __sched * rwsem_down_failed_common(struct rw_semaphore *sem, - unsigned int flags, signed long adjustment) + enum rwsem_waiter_type type, signed long adjustment) { struct rwsem_waiter waiter; struct task_struct *tsk = current; @@ -190,7 +193,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); waiter.task = tsk; - waiter.flags = flags; + waiter.type = type; get_task_struct(tsk); if (list_empty(&sem->wait_list)) @@ -221,7 +224,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, raw_spin_lock_irq(&sem->wait_lock); /* Try to get the writer sem, may steal from the head writer: */ - if (flags == RWSEM_WAITING_FOR_WRITE) + if (type == RWSEM_WAITING_FOR_WRITE) if (try_get_writer_sem(sem, &waiter)) { raw_spin_unlock_irq(&sem->wait_lock); return sem; From 5528ce73642af0c2a95d2e91f4dba71010564014 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:08:57 -0500 Subject: [PATCH 422/678] rwsem: shorter spinlocked section in rwsem_down_failed_common() This change reduces the size of the spinlocked and TASK_UNINTERRUPTIBLE sections in rwsem_down_failed_common(): - We only need the sem->wait_lock to insert ourselves on the wait_list; the waiter node can be prepared outside of the wait_lock. - The task state only needs to be set to TASK_UNINTERRUPTIBLE immediately before checking if we actually need to sleep; it doesn't need to protect the entire function. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 69924ae8345..8c72a2adac1 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -188,14 +188,12 @@ rwsem_down_failed_common(struct rw_semaphore *sem, struct task_struct *tsk = current; signed long count; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - raw_spin_lock_irq(&sem->wait_lock); waiter.task = tsk; waiter.type = type; get_task_struct(tsk); + raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) adjustment += RWSEM_WAITING_BIAS; list_add_tail(&waiter.list, &sem->wait_list); @@ -218,7 +216,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem, raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ - for (;;) { + while (true) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!waiter.task) break; @@ -231,7 +230,6 @@ rwsem_down_failed_common(struct rw_semaphore *sem, } raw_spin_unlock_irq(&sem->wait_lock); schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); } tsk->state = TASK_RUNNING; From b9f46f7149ab37700b92e1a158f9f72e25849c7a Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:10:22 -0500 Subject: [PATCH 423/678] rwsem: move rwsem_down_failed_common code into rwsem_down_{read,write}_failed Remove the rwsem_down_failed_common function and replace it with two identical copies of its code in rwsem_down_{read,write}_failed. This is because we want to make different optimizations in rwsem_down_{read,write}_failed; we are adding this pure-duplication step as a separate commit in order to make it easier to check the following steps. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 72 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 8c72a2adac1..9516459f4be 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -178,12 +178,12 @@ static int try_get_writer_sem(struct rw_semaphore *sem, } /* - * wait for a lock to be granted + * wait for the read lock to be granted */ -static struct rw_semaphore __sched * -rwsem_down_failed_common(struct rw_semaphore *sem, - enum rwsem_waiter_type type, signed long adjustment) +struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { + enum rwsem_waiter_type type = RWSEM_WAITING_FOR_READ; + signed long adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; signed long count; @@ -237,22 +237,64 @@ rwsem_down_failed_common(struct rw_semaphore *sem, return sem; } -/* - * wait for the read lock to be granted - */ -struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) -{ - return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, - -RWSEM_ACTIVE_READ_BIAS); -} - /* * wait for the write lock to be granted */ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { - return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, - -RWSEM_ACTIVE_WRITE_BIAS); + enum rwsem_waiter_type type = RWSEM_WAITING_FOR_WRITE; + signed long adjustment = -RWSEM_ACTIVE_WRITE_BIAS; + struct rwsem_waiter waiter; + struct task_struct *tsk = current; + signed long count; + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.type = type; + get_task_struct(tsk); + + raw_spin_lock_irq(&sem->wait_lock); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); + + /* we're now waiting on the lock, but no longer actively locking */ + count = rwsem_atomic_update(adjustment, sem); + + /* If there are no active locks, wake the front queued process(es) up. + * + * Alternatively, if we're called from a failed down_write(), there + * were already threads queued before us and there are no active + * writers, the lock must be read owned; so we try to wake any read + * locks that were queued ahead of us. */ + if (count == RWSEM_WAITING_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + else if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + + raw_spin_unlock_irq(&sem->wait_lock); + + /* wait to be given the lock */ + while (true) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!waiter.task) + break; + + raw_spin_lock_irq(&sem->wait_lock); + /* Try to get the writer sem, may steal from the head writer: */ + if (type == RWSEM_WAITING_FOR_WRITE) + if (try_get_writer_sem(sem, &waiter)) { + raw_spin_unlock_irq(&sem->wait_lock); + return sem; + } + raw_spin_unlock_irq(&sem->wait_lock); + schedule(); + } + + tsk->state = TASK_RUNNING; + + return sem; } /* From a0e1c5516f5322a820ed7fa2197f03b41b006acc Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:11:18 -0500 Subject: [PATCH 424/678] rwsem: simplify rwsem_down_read_failed When trying to acquire a read lock, the RWSEM_ACTIVE_READ_BIAS adjustment doesn't cause other readers to block, so we never have to worry about waking them back after canceling this adjustment in rwsem_down_read_failed(). We also never want to steal the lock in rwsem_down_read_failed(), so we don't have to grab the wait_lock either. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 9516459f4be..eb68e7264fe 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -182,7 +182,6 @@ static int try_get_writer_sem(struct rw_semaphore *sem, */ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { - enum rwsem_waiter_type type = RWSEM_WAITING_FOR_READ; signed long adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; @@ -190,7 +189,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; - waiter.type = type; + waiter.type = RWSEM_WAITING_FOR_READ; get_task_struct(tsk); raw_spin_lock_irq(&sem->wait_lock); @@ -201,17 +200,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* If there are no active locks, wake the front queued process(es) up. - * - * Alternatively, if we're called from a failed down_write(), there - * were already threads queued before us and there are no active - * writers, the lock must be read owned; so we try to wake any read - * locks that were queued ahead of us. */ + /* If there are no active locks, wake the front queued process(es). */ if (count == RWSEM_WAITING_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); - else if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); raw_spin_unlock_irq(&sem->wait_lock); @@ -220,15 +211,6 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!waiter.task) break; - - raw_spin_lock_irq(&sem->wait_lock); - /* Try to get the writer sem, may steal from the head writer: */ - if (type == RWSEM_WAITING_FOR_WRITE) - if (try_get_writer_sem(sem, &waiter)) { - raw_spin_unlock_irq(&sem->wait_lock); - return sem; - } - raw_spin_unlock_irq(&sem->wait_lock); schedule(); } From 89b4e3cf3edf0b58241d7c57dbd4ae7806ddd4ef Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:11:52 -0500 Subject: [PATCH 425/678] rwsem: simplify rwsem_down_write_failed When waking writers, we never grant them the lock - instead, they have to acquire it themselves when they run, and remove themselves from the wait_list when they succeed. As a result, we can do a few simplifications in rwsem_down_write_failed(): - We don't need to check for !waiter.task since __rwsem_do_wake() doesn't remove writers from the wait_list - There is no point releaseing the wait_lock before entering the wait loop, as we will need to reacquire it immediately. We can change the loop so that the lock is always held at the start of each loop iteration. - We don't need to get a reference on the task structure, since the task is responsible for removing itself from the wait_list. There is no risk, like in the rwsem_down_read_failed() case, that a task would wake up and exit (thus destroying its task structure) while __rwsem_do_wake() is still running - wait_lock protects against that. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index eb68e7264fe..6cec0ab2c0d 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -161,16 +161,8 @@ static int try_get_writer_sem(struct rw_semaphore *sem, try_again_write: oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (!(oldcount & RWSEM_ACTIVE_MASK)) { - /* No active lock: */ - struct task_struct *tsk = waiter->task; - - list_del(&waiter->list); - smp_mb(); - put_task_struct(tsk); - tsk->state = TASK_RUNNING; + if (!(oldcount & RWSEM_ACTIVE_MASK)) return 1; - } /* some one grabbed the sem already */ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) return 0; @@ -220,11 +212,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) } /* - * wait for the write lock to be granted + * wait until we successfully acquire the write lock */ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { - enum rwsem_waiter_type type = RWSEM_WAITING_FOR_WRITE; signed long adjustment = -RWSEM_ACTIVE_WRITE_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; @@ -232,8 +223,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; - waiter.type = type; - get_task_struct(tsk); + waiter.type = RWSEM_WAITING_FOR_WRITE; raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) @@ -255,25 +245,20 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) adjustment == -RWSEM_ACTIVE_WRITE_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); - raw_spin_unlock_irq(&sem->wait_lock); - - /* wait to be given the lock */ + /* wait until we successfully acquire the lock */ while (true) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (!waiter.task) + + if (try_get_writer_sem(sem, &waiter)) break; - raw_spin_lock_irq(&sem->wait_lock); - /* Try to get the writer sem, may steal from the head writer: */ - if (type == RWSEM_WAITING_FOR_WRITE) - if (try_get_writer_sem(sem, &waiter)) { - raw_spin_unlock_irq(&sem->wait_lock); - return sem; - } raw_spin_unlock_irq(&sem->wait_lock); schedule(); + raw_spin_lock_irq(&sem->wait_lock); } + list_del(&waiter.list); + raw_spin_unlock_irq(&sem->wait_lock); tsk->state = TASK_RUNNING; return sem; From d9b8cf0a1b16b5daf3000f1c2823011d21eba176 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:13:44 -0500 Subject: [PATCH 426/678] rwsem: more agressive lock stealing in rwsem_down_write_failed Some small code simplifications can be achieved by doing more agressive lock stealing: - When rwsem_down_write_failed() notices that there are no active locks (and thus no thread to wake us if we decided to sleep), it used to wake the first queued process. However, stealing the lock is also sufficient to deal with this case, so we don't need this check anymore. - In try_get_writer_sem(), we can steal the lock even when the first waiter is a reader. This is correct because the code path that wakes readers is protected by the wait_lock. As to the performance effects of this change, they are expected to be minimal: readers are still granted the lock (rather than having to acquire it themselves) when they reach the front of the wait queue, so we have essentially the same behavior as in rwsem-spinlock. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 6cec0ab2c0d..decc13f7695 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -143,20 +143,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) } /* Try to get write sem, caller holds sem->wait_lock: */ -static int try_get_writer_sem(struct rw_semaphore *sem, - struct rwsem_waiter *waiter) +static int try_get_writer_sem(struct rw_semaphore *sem) { - struct rwsem_waiter *fwaiter; long oldcount, adjustment; - /* only steal when first waiter is writing */ - fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (fwaiter->type != RWSEM_WAITING_FOR_WRITE) - return 0; - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - /* Only one waiter in the queue: */ - if (fwaiter == waiter && waiter->list.next == &sem->wait_list) + if (list_is_singular(&sem->wait_list)) adjustment -= RWSEM_WAITING_BIAS; try_again_write: @@ -233,23 +225,18 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* If there are no active locks, wake the front queued process(es) up. - * - * Alternatively, if we're called from a failed down_write(), there - * were already threads queued before us and there are no active - * writers, the lock must be read owned; so we try to wake any read - * locks that were queued ahead of us. */ - if (count == RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); - else if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + /* If there were already threads queued before us and there are no + * active writers, the lock must be read owned; so we try to wake + * any read locks that were queued ahead of us. */ + if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); /* wait until we successfully acquire the lock */ while (true) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (try_get_writer_sem(sem, &waiter)) + if (try_get_writer_sem(sem)) break; raw_spin_unlock_irq(&sem->wait_lock); From c4ad506d44546bbd9c87085ef07b50e59bd722dd Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:14:43 -0500 Subject: [PATCH 427/678] rwsem: use cmpxchg for trying to steal write lock Using rwsem_atomic_update to try stealing the write lock forced us to undo the adjustment in the failure path. We can have simpler and faster code by using cmpxchg instead. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index decc13f7695..60f3337163f 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -142,25 +142,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) return sem; } -/* Try to get write sem, caller holds sem->wait_lock: */ -static int try_get_writer_sem(struct rw_semaphore *sem) -{ - long oldcount, adjustment; - - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - if (list_is_singular(&sem->wait_list)) - adjustment -= RWSEM_WAITING_BIAS; - -try_again_write: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (!(oldcount & RWSEM_ACTIVE_MASK)) - return 1; - /* some one grabbed the sem already */ - if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) - return 0; - goto try_again_write; -} - /* * wait for the read lock to be granted */ @@ -236,7 +217,12 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) while (true) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (try_get_writer_sem(sem)) + /* Try acquiring the write lock. */ + count = RWSEM_ACTIVE_WRITE_BIAS; + if (!list_is_singular(&sem->wait_list)) + count += RWSEM_WAITING_BIAS; + if (cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == + RWSEM_WAITING_BIAS) break; raw_spin_unlock_irq(&sem->wait_lock); From 6d97c8bcfa7bb8e19019b8a41a87c34afa64fb1a Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:16:12 -0500 Subject: [PATCH 428/678] rwsem: avoid taking wait_lock in rwsem_down_write_failed In rwsem_down_write_failed(), if there are active locks after we wake up (i.e. the lock got stolen from us), skip taking the wait_lock and go back to sleep immediately. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 60f3337163f..89aac4aee98 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -214,8 +214,8 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); /* wait until we successfully acquire the lock */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); while (true) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); /* Try acquiring the write lock. */ count = RWSEM_ACTIVE_WRITE_BIAS; @@ -226,7 +226,13 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) break; raw_spin_unlock_irq(&sem->wait_lock); - schedule(); + + /* Block until there are no active lockers. */ + do { + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } while (sem->count & RWSEM_ACTIVE_MASK); + raw_spin_lock_irq(&sem->wait_lock); } From 0114396db51983d94771452fa15b72477f7822f5 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:16:56 -0500 Subject: [PATCH 429/678] rwsem: skip initial trylock in rwsem_down_write_failed We can skip the initial trylock in rwsem_down_write_failed() if there are known active lockers already, thus saving one likely-to-fail cmpxchg. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 89aac4aee98..baf06324928 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -216,14 +216,15 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* wait until we successfully acquire the lock */ set_task_state(tsk, TASK_UNINTERRUPTIBLE); while (true) { - - /* Try acquiring the write lock. */ - count = RWSEM_ACTIVE_WRITE_BIAS; - if (!list_is_singular(&sem->wait_list)) - count += RWSEM_WAITING_BIAS; - if (cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == + if (!(count & RWSEM_ACTIVE_MASK)) { + /* Try acquiring the write lock. */ + count = RWSEM_ACTIVE_WRITE_BIAS; + if (!list_is_singular(&sem->wait_list)) + count += RWSEM_WAITING_BIAS; + if (cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == RWSEM_WAITING_BIAS) - break; + break; + } raw_spin_unlock_irq(&sem->wait_lock); @@ -231,7 +232,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) do { schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } while (sem->count & RWSEM_ACTIVE_MASK); + } while ((count = sem->count) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } From 51099ed9a73d0c1202f06eb7c03cd706bf12b705 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:18:34 -0500 Subject: [PATCH 430/678] rwsem: simplify __rwsem_do_wake This is mostly for cleanup value: - We don't need several gotos to handle the case where the first waiter is a writer. Two simple tests will do (and generate very similar code). - In the remainder of the function, we know the first waiter is a reader, so we don't have to double check that. We can use do..while loops to iterate over the readers to wake (generates slightly better code). Signed-off-by: Michel Lespinasse --- lib/rwsem-spinlock.c | 25 ++++++++----------------- lib/rwsem.c | 26 ++++++++++++-------------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index f6f139948a7..d437919b138 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -70,26 +70,17 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!wakewrite) { - if (waiter->type == RWSEM_WAITING_FOR_WRITE) - goto out; - goto dont_wake_writers; - } - - /* - * as we support write lock stealing, we can't set sem->activity - * to -1 here to indicate we get the lock. Instead, we wake it up - * to let it go get it again. - */ if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - wake_up_process(waiter->task); + if (wakewrite) + /* Wake up a writer. Note that we do not grant it the + * lock - it will have to acquire it when it runs. */ + wake_up_process(waiter->task); goto out; } - /* grant an infinite number of read locks to the front of the queue */ - dont_wake_writers: + /* grant read locks to all queued readers. */ woken = 0; - while (waiter->type == RWSEM_WAITING_FOR_READ) { + do { struct list_head *next = waiter->list.next; list_del(&waiter->list); @@ -99,10 +90,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) wake_up_process(tsk); put_task_struct(tsk); woken++; - if (list_empty(&sem->wait_list)) + if (next == &sem->wait_list) break; waiter = list_entry(next, struct rwsem_waiter, list); - } + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); sem->activity += woken; diff --git a/lib/rwsem.c b/lib/rwsem.c index baf06324928..86552f7c378 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -68,20 +68,17 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (waiter->type != RWSEM_WAITING_FOR_WRITE) - goto readers_only; - - if (wake_type == RWSEM_WAKE_READ_OWNED) - /* Another active reader was observed, so wakeup is not - * likely to succeed. Save the atomic op. - */ + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { + if (wake_type != RWSEM_WAKE_READ_OWNED) + /* Wake writer at the front of the queue, but do not + * grant it the lock yet as we want other writers + * to be able to steal it. Readers, on the other hand, + * will block as they will notice the queued writer. + */ + wake_up_process(waiter->task); goto out; + } - /* Wake up the writing waiter and let the task grab the sem: */ - wake_up_process(waiter->task); - goto out; - - readers_only: /* If we come here from up_xxxx(), another thread might have reached * rwsem_down_failed_common() before we acquired the spinlock and * woken up a waiter, making it now active. We prefer to check for @@ -125,7 +122,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; - for (loop = woken; loop > 0; loop--) { + loop = woken; + do { waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; @@ -133,7 +131,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) waiter->task = NULL; wake_up_process(tsk); put_task_struct(tsk); - } + } while (--loop); sem->wait_list.next = next; next->prev = &sem->wait_list; From a4018d426447e29c75ebd438382cff073afe076a Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:19:38 -0500 Subject: [PATCH 431/678] rwsem: implement support for write lock stealing on the fastpath When we decide to wake up readers, we must first grant them as many read locks as necessary, and then actually wake up all these readers. But in order to know how many read shares to grant, we must first count the readers at the head of the queue. This might take a while if there are many readers, and we want to be protected against a writer stealing the lock while we're counting. To that end, we grant the first reader lock before counting how many more readers are queued. We also require some adjustments to the wake_type semantics. RWSEM_WAKE_NO_ACTIVE used to mean that we had found the count to be RWSEM_WAITING_BIAS, in which case the rwsem was known to be free as nobody could steal it while we hold the wait_lock. This doesn't make sense once we implement fastpath write lock stealing, so we now use RWSEM_WAKE_ANY in that case. Similarly, when rwsem_down_write_failed found that a read lock was active, it would use RWSEM_WAKE_READ_OWNED which signalled that new readers could be woken without checking first that the rwsem was available. We can't do that anymore since the existing readers might release their read locks, and a writer could steal the lock before we wake up additional readers. So, we have to use a new RWSEM_WAKE_READERS value to indicate we only want to wake readers, but we don't currently hold any read lock. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 63 ++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 86552f7c378..96b00a1b3ff 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -41,13 +41,11 @@ struct rwsem_waiter { enum rwsem_waiter_type type; }; -/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and - * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held - * since the rwsem value was observed. - */ -#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ -#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ -#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ +enum rwsem_wake_type { + RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ + RWSEM_WAKE_READERS, /* Wake readers only */ + RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ +}; /* * handle the lock release when processes blocked on it that can now run @@ -60,16 +58,16 @@ struct rwsem_waiter { * - writers are only woken if downgrading is false */ static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) +__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) { struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long woken, loop, adjustment; + signed long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wake_type != RWSEM_WAKE_READ_OWNED) + if (wake_type == RWSEM_WAKE_ANY) /* Wake writer at the front of the queue, but do not * grant it the lock yet as we want other writers * to be able to steal it. Readers, on the other hand, @@ -79,24 +77,24 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) goto out; } - /* If we come here from up_xxxx(), another thread might have reached - * rwsem_down_failed_common() before we acquired the spinlock and - * woken up a waiter, making it now active. We prefer to check for - * this first in order to not spend too much time with the spinlock - * held if we're not going to be able to wake up readers in the end. - * - * Note that we do not need to update the rwsem count: any writer - * trying to acquire rwsem will run rwsem_down_write_failed() due - * to the waiting threads and block trying to acquire the spinlock. - * - * We use a dummy atomic update in order to acquire the cache line - * exclusively since we expect to succeed and run the final rwsem - * count adjustment pretty soon. + /* Writers might steal the lock before we grant it to the next reader. + * We prefer to do the first reader grant before counting readers + * so we can bail out early if a writer stole the lock. */ - if (wake_type == RWSEM_WAKE_ANY && - rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) - /* Someone grabbed the sem for write already */ - goto out; + adjustment = 0; + if (wake_type != RWSEM_WAKE_READ_OWNED) { + adjustment = RWSEM_ACTIVE_READ_BIAS; + try_reader_grant: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { + /* A writer stole the lock. Undo our reader grant. */ + if (rwsem_atomic_update(-adjustment, sem) & + RWSEM_ACTIVE_MASK) + goto out; + /* Last active locker left. Retry waking readers. */ + goto try_reader_grant; + } + } /* Grant an infinite number of read locks to the readers at the front * of the queue. Note we increment the 'active part' of the count by @@ -114,12 +112,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - adjustment = woken * RWSEM_ACTIVE_READ_BIAS; + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; if (waiter->type != RWSEM_WAITING_FOR_WRITE) /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; - rwsem_atomic_add(adjustment, sem); + if (adjustment) + rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; loop = woken; @@ -164,8 +163,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) count = rwsem_atomic_update(adjustment, sem); /* If there are no active locks, wake the front queued process(es). */ - if (count == RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + if (!(count & RWSEM_ACTIVE_MASK)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); raw_spin_unlock_irq(&sem->wait_lock); @@ -209,7 +208,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) * any read locks that were queued ahead of us. */ if (count > RWSEM_WAITING_BIAS && adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); /* wait until we successfully acquire the lock */ set_task_state(tsk, TASK_UNINTERRUPTIBLE); From 627cec5abcf36bdfbe324fce795e02579dd8650a Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 15 Mar 2013 15:20:40 -0500 Subject: [PATCH 432/678] rwsem: do not block readers at head of queue if other readers are active This change fixes a race condition where a reader might determine it needs to block, but by the time it acquires the wait_lock the rwsem has active readers and no queued waiters. In this situation the reader can just in parallel with the existing active readers; it does not need to block until the active readers complete. Thanks to Peter Hurley for noticing this possible race. Signed-off-by: Michel Lespinasse --- lib/rwsem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/rwsem.c b/lib/rwsem.c index 96b00a1b3ff..ddb234a36fc 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -162,8 +162,14 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* If there are no active locks, wake the front queued process(es). */ - if (!(count & RWSEM_ACTIVE_MASK)) + /* If there are no active locks, wake the front queued process(es). + * + * If there are no writers and we are first in the queue, + * wake our own waiter to join the existing active readers ! + */ + if (count == RWSEM_WAITING_BIAS || + (count > RWSEM_WAITING_BIAS && + adjustment != -RWSEM_ACTIVE_READ_BIAS)) sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); raw_spin_unlock_irq(&sem->wait_lock); From 491b5aefcfc1c43dfda8909dab8b2515e3c57f03 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 3 Apr 2013 23:17:56 -0400 Subject: [PATCH 433/678] mach-tegra: board-grouper-panel.c: reset to stock mr1.1 --- arch/arm/mach-tegra/board-grouper-panel.c | 61 +++-------------------- 1 file changed, 7 insertions(+), 54 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index f244b5dd578..e05433934d7 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -66,9 +66,8 @@ static struct regulator *grouper_lvds_reg; static struct regulator *grouper_lvds_vdd_panel; static tegra_dc_bl_output grouper_bl_output_measured = { - /* Stock Backlight values */ -/* 0, 5, 5, 5, 5, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, + 0, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, @@ -99,53 +98,6 @@ static tegra_dc_bl_output grouper_bl_output_measured = { 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 -*/ - /* 0 - 9 */ - /* unused by standard android brightness settings */ -#ifdef DECREASE_MIN_BRIGHTNESS - 0, 2, 3, 4, 6, 8, 8, 8, 8, 8, -#else - 0, 2, 4, 6, 8, 13, 13, 13, 13, 13, -#endif - /* 10 - 15 */ - /* backlight for level one below SD min (16-1=15) must be the same as backlihgt for SD min (13=13) to prevent flickering */ -#ifdef DECREASE_MIN_BRIGHTNESS - 8, 9, 10, 11, 12, 13, -#else - 13, 13, 13, 13, 13, 13, -#endif - /* 16 - 31 */ - /* screen dimmer minimum - default: 13 -> 13. currently: 16 -> 13. */ - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - /* 32 - 47 */ - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - /* 48 - 63 */ - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, - /* 64 - 79 */ - 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, - /* 80 - 95 */ - 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, - /* 96 - 111 */ - 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, - /* 112 - 127 */ - 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, - /* 128 - 143 */ - 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, - /* 144 - 159 */ - 142, 143, 144, 145, 146, 147, 148, 148, 149, 150, 151, 152, 153, 154, 155, 156, - /* 160 - 175 */ - 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, - /* 176 - 191 */ - 173, 174, 175, 176, 177, 179, 180, 181, 182, 184, 185, 186, 187, 188, 189, 190, - /* 192 - 207 */ - 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, - /* 208 - 223 */ - 207, 208, 209, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - /* 224 - 239 */ - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - /* 240 - 255 */ - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 - }; static p_tegra_dc_bl_output bl_output; @@ -435,7 +387,8 @@ static struct resource grouper_disp2_resources[] = { #endif static struct tegra_dc_mode grouper_panel_modes[] = { -{ + { + /* 1280x800@60Hz */ .pclk = 68000000, .h_ref_to_sync = 1, .v_ref_to_sync = 1, @@ -451,13 +404,13 @@ static struct tegra_dc_mode grouper_panel_modes[] = { }; static struct tegra_dc_sd_settings grouper_sd_settings = { - .enable = 0, /* enabled by default. */ + .enable = 1, /* enabled by default. */ .use_auto_pwm = false, .hw_update_delay = 0, .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 16, + .panel_min_brightness = 13, .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, @@ -596,7 +549,7 @@ static struct tegra_dc_out grouper_disp1_out = { .type = TEGRA_DC_OUT_RGB, .depth = 18, - .dither = TEGRA_DC_ERRDIFF_DITHER, + .dither = TEGRA_DC_ORDERED_DITHER, .modes = grouper_panel_modes, .n_modes = ARRAY_SIZE(grouper_panel_modes), From 3d8ab316975fce38a7af19793b5aa039a4fafc96 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 3 Apr 2013 23:22:53 -0400 Subject: [PATCH 434/678] mach-tegra: board-grouper-panel.c: add user min brightness control --- arch/arm/mach-tegra/board-grouper-panel.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index e05433934d7..415373997b8 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -39,6 +39,11 @@ #include "gpio-names.h" #include + +#include +static unsigned int min_brightness = 13; +module_param(min_brightness, uint, 0644); + /* grouper default display board pins */ #define grouper_lvds_avdd_en TEGRA_GPIO_PH6 #define grouper_lvds_rst TEGRA_GPIO_PG7 @@ -66,8 +71,8 @@ static struct regulator *grouper_lvds_reg; static struct regulator *grouper_lvds_vdd_panel; static tegra_dc_bl_output grouper_bl_output_measured = { - 0, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, @@ -154,8 +159,11 @@ static int grouper_backlight_notify(struct device *unused, int brightness) if (brightness > 255) pr_info("Error: Brightness > 255!\n"); else - brightness = bl_output[brightness]; - + if ((brightness > 0) && (brightness < min_brightness)) { + brightness = min_brightness; + } else { + brightness = bl_output[brightness]; + } return brightness; } From 48a358a345c4051665e9a343807d0aa96f7053c0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 3 Apr 2013 23:24:14 -0400 Subject: [PATCH 435/678] mach-tegra: board-grouper-panel.c: change dithering mode --- arch/arm/mach-tegra/board-grouper-panel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 415373997b8..20e9f4e7d8f 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -557,7 +557,7 @@ static struct tegra_dc_out grouper_disp1_out = { .type = TEGRA_DC_OUT_RGB, .depth = 18, - .dither = TEGRA_DC_ORDERED_DITHER, + .dither = TEGRA_DC_ERRDIFF_DITHER, .modes = grouper_panel_modes, .n_modes = ARRAY_SIZE(grouper_panel_modes), From c89efb76dceb966fed93e460ba5ab11cc03dd7a0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 4 Apr 2013 00:05:23 -0400 Subject: [PATCH 436/678] mach-tegra: board-grouper-panel.c: ensure auto min and slider min are the same --- arch/arm/mach-tegra/board-grouper-panel.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 20e9f4e7d8f..8faa6bb485a 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -41,8 +41,8 @@ #include -static unsigned int min_brightness = 13; -module_param(min_brightness, uint, 0644); +static unsigned int min_backlight = 13; +module_param(min_backlight, uint, 0644); /* grouper default display board pins */ #define grouper_lvds_avdd_en TEGRA_GPIO_PH6 @@ -148,6 +148,7 @@ static void grouper_backlight_exit(struct device *dev) static int grouper_backlight_notify(struct device *unused, int brightness) { int cur_sd_brightness = atomic_read(&sd_brightness); + int min_brightness = min_backlight; /* Set the backlight GPIO pin mode to 'backlight_enable' */ //gpio_set_value(grouper_bl_enb, !!brightness); @@ -155,12 +156,16 @@ static int grouper_backlight_notify(struct device *unused, int brightness) /* SD brightness is a percentage, 8-bit value. */ brightness = (brightness * cur_sd_brightness) / 255; + /* Ensure that min backlight goes up to at least 10 to prevent auto-min != slider-min */ + if (min_backlight < 10) + min_brightness = 10; + /* Apply any backlight response curve */ if (brightness > 255) pr_info("Error: Brightness > 255!\n"); else if ((brightness > 0) && (brightness < min_brightness)) { - brightness = min_brightness; + brightness = min_backlight; } else { brightness = bl_output[brightness]; } From 373d74c0df2a3da125883491a876c2ec786b19a6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 5 Apr 2013 15:13:12 -0400 Subject: [PATCH 437/678] Revert "net: wireless: bcmdhd: Enable P2P probe request handling only during discovery" This reverts commit effc09cc0524d65ec470b158e2faab61f77d99bb. --- drivers/net/wireless/bcmdhd/dhd_linux.c | 1 + drivers/net/wireless/bcmdhd/wl_cfg80211.c | 3 ++- drivers/net/wireless/bcmdhd/wl_cfg80211.h | 1 - drivers/net/wireless/bcmdhd/wl_cfgp2p.c | 9 +-------- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index a871bdba807..6cc2e1ed89b 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -3391,6 +3391,7 @@ dhd_preinit_ioctls(dhd_pub_t *dhd) setbit(eventmask, WLC_E_ACTION_FRAME_RX); setbit(eventmask, WLC_E_ACTION_FRAME_COMPLETE); setbit(eventmask, WLC_E_ACTION_FRAME_OFF_CHAN_COMPLETE); + setbit(eventmask, WLC_E_P2P_PROBREQ_MSG); setbit(eventmask, WLC_E_P2P_DISC_LISTEN_COMPLETE); } #endif /* WL_CFG80211 */ diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 2f28bf1d3f7..b4f47d1393c 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -331,6 +331,7 @@ static __used bool wl_is_ibssstarter(struct wl_priv *wl); */ static s32 __wl_cfg80211_up(struct wl_priv *wl); static s32 __wl_cfg80211_down(struct wl_priv *wl); +static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); static bool wl_is_linkdown(struct wl_priv *wl, const wl_event_msg_t *e); static bool wl_is_linkup(struct wl_priv *wl, const wl_event_msg_t *e, struct net_device *ndev); static bool wl_is_nonetwork(struct wl_priv *wl, const wl_event_msg_t *e); @@ -7088,7 +7089,7 @@ static s32 wl_config_ifmode(struct wl_priv *wl, struct net_device *ndev, s32 ift return 0; } -s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) +static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) { s8 iovbuf[WL_EVENTING_MASK_LEN + 12]; diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.h b/drivers/net/wireless/bcmdhd/wl_cfg80211.h index 6d237eee2cc..dfb0d0de2f7 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.h +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.h @@ -689,5 +689,4 @@ void wl_cfg80211_enable_trace(int level); extern s32 wl_update_wiphybands(struct wl_priv *wl); extern s32 wl_cfg80211_if_is_group_owner(void); extern int wl_cfg80211_update_power_mode(struct net_device *dev); -extern s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); #endif /* _wl_cfg80211_h_ */ diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c index aedf9705b44..7bcd14486dd 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c @@ -641,7 +641,7 @@ wl_cfgp2p_enable_discovery(struct wl_priv *wl, struct net_device *dev, } set_ie: ret = wl_cfgp2p_set_management_ie(wl, dev, - wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE), + wl_cfgp2p_find_idx(wl, dev), VNDR_IE_PRBREQ_FLAG, ie, ie_len); if (unlikely(ret < 0)) { @@ -1230,10 +1230,6 @@ wl_cfgp2p_listen_complete(struct wl_priv *wl, struct net_device *ndev, } cfg80211_remain_on_channel_expired(ndev, wl->last_roc_id, &wl->remain_on_chan, wl->remain_on_chan_type, GFP_KERNEL); - if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), - WLC_E_P2P_PROBREQ_MSG, false) != BCME_OK) { - CFGP2P_ERR((" failed to unset WLC_E_P2P_PROPREQ_MSG\n")); - } } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); @@ -1325,9 +1321,6 @@ wl_cfgp2p_discover_listen(struct wl_priv *wl, s32 channel, u32 duration_ms) } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); - if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), WLC_E_P2P_PROBREQ_MSG, true) != BCME_OK) { - CFGP2P_ERR((" failed to set WLC_E_P2P_PROPREQ_MSG\n")); - } wl_cfgp2p_set_p2p_mode(wl, WL_P2P_DISC_ST_LISTEN, channel, (u16) duration_ms, wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE)); _timer = &wl->p2p->listen_timer; From 0e5e5352bfc554f1c6da7a9cfba5cb8b4147c5a6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 5 Apr 2013 15:15:30 -0400 Subject: [PATCH 438/678] Revert "Proportional Rate Reduction for TCP." This reverts commit a5439ddd77ac24480993567539bb67c111c2568f. --- include/linux/tcp.h | 4 --- net/ipv4/tcp_input.c | 58 +++++-------------------------------------- net/ipv4/tcp_output.c | 7 +----- 3 files changed, 7 insertions(+), 62 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6b63b310af3..531ede8006d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -379,10 +379,6 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; - u32 prior_cwnd; /* Congestion window at start of Recovery. */ - u32 prr_delivered; /* Number of newly delivered packets to - * receiver in Recovery. */ - u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e12dfb3f62e..d73aab3fbfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2828,13 +2828,9 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - - /* Do not moderate cwnd if it's already undone in cwr or recovery. */ - if (tp->undo_marker) { - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - else /* PRR */ - tp->snd_cwnd = tp->snd_ssthresh; + /* Do not moderate cwnd if it's already undone in cwr or recovery */ + if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2952,38 +2948,6 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); -/* This function implements the PRR algorithm, specifcally the PRR-SSRB - * (proportional rate reduction with slow start reduction bound) as described in - * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. - * It computes the number of packets to send (sndcnt) based on packets newly - * delivered: - * 1) If the packets in flight is larger than ssthresh, PRR spreads the - * cwnd reductions across a full RTT. - * 2) If packets in flight is lower than ssthresh (such as due to excess - * losses and/or application stalls), do not perform any further cwnd - * reductions, but instead slow start up to ssthresh. - */ -static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, - int fast_rexmit, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - int sndcnt = 0; - int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); - - if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { - u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + - tp->prior_cwnd - 1; - sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else { - sndcnt = min_t(int, delta, - max_t(int, tp->prr_delivered - tp->prr_out, - newly_acked_sacked) + 1); - } - - sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); - tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; -} - /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2995,8 +2959,7 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3146,17 +3109,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; - tp->prior_cwnd = tp->snd_cwnd; - tp->prr_delivered = 0; - tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tp->prr_delivered += newly_acked_sacked; - tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); + tcp_cwnd_down(sk, flag); tcp_xmit_retransmit_queue(sk); } @@ -3671,8 +3630,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets; - int prior_sacked = tp->sacked_out; - int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3744,9 +3701,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); - newly_acked_sacked = (prior_packets - prior_sacked) - - (tp->packets_out - tp->sacked_out); - if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3759,7 +3713,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - newly_acked_sacked, flag); + flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b1f32b459e5..faf257b9415 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1794,13 +1794,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tcp_event_new_data_sent(sk, skb); tcp_minshall_update(tp, mss_now, skb); - sent_pkts += tcp_skb_pcount(skb); + sent_pkts++; if (push_one) break; } - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) - tp->prr_out += sent_pkts; if (likely(sent_pkts)) { tcp_cwnd_validate(sk); @@ -2294,9 +2292,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk) return; NET_INC_STATS_BH(sock_net(sk), mib_idx); - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) - tp->prr_out += tcp_skb_pcount(skb); - if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, From a722efde1868caa622ce0472363875e0399e993f Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 5 Apr 2013 15:36:01 -0400 Subject: [PATCH 439/678] Revert "Revert "Proportional Rate Reduction for TCP."" This reverts commit 0e5e5352bfc554f1c6da7a9cfba5cb8b4147c5a6. --- include/linux/tcp.h | 4 +++ net/ipv4/tcp_input.c | 58 ++++++++++++++++++++++++++++++++++++++----- net/ipv4/tcp_output.c | 7 +++++- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 531ede8006d..6b63b310af3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -379,6 +379,10 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; + u32 prior_cwnd; /* Congestion window at start of Recovery. */ + u32 prr_delivered; /* Number of newly delivered packets to + * receiver in Recovery. */ + u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d73aab3fbfc..e12dfb3f62e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2828,9 +2828,13 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - /* Do not moderate cwnd if it's already undone in cwr or recovery */ - if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { - tp->snd_cwnd = tp->snd_ssthresh; + + /* Do not moderate cwnd if it's already undone in cwr or recovery. */ + if (tp->undo_marker) { + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + else /* PRR */ + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2948,6 +2952,38 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, + int fast_rexmit, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2959,7 +2995,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit); * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, + int newly_acked_sacked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3109,13 +3146,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_down(sk, flag); + tp->prr_delivered += newly_acked_sacked; + tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); tcp_xmit_retransmit_queue(sk); } @@ -3630,6 +3671,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets; + int prior_sacked = tp->sacked_out; + int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3701,6 +3744,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + newly_acked_sacked = (prior_packets - prior_sacked) - + (tp->packets_out - tp->sacked_out); + if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3713,7 +3759,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - flag); + newly_acked_sacked, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index faf257b9415..b1f32b459e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1794,11 +1794,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tcp_event_new_data_sent(sk, skb); tcp_minshall_update(tp, mss_now, skb); - sent_pkts++; + sent_pkts += tcp_skb_pcount(skb); if (push_one) break; } + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += sent_pkts; if (likely(sent_pkts)) { tcp_cwnd_validate(sk); @@ -2292,6 +2294,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) return; NET_INC_STATS_BH(sock_net(sk), mib_idx); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += tcp_skb_pcount(skb); + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, From e5d05cf53d369283880d49399a6318cec1874f92 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 5 Apr 2013 18:15:15 -0400 Subject: [PATCH 440/678] defconfig: partially sync with tegra3 defconfig --- arch/arm/mach-tegra/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 53313050f78..d1aec45cb99 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -286,12 +286,12 @@ config VOLTAGE_CONTROL help User custom voltage control interface -config DECREASE_MIN_BRIGHTNESS - bool "Decrease Minumum Brightness" +config CUSTOM_BRIGHTNESS + bool "Custom Brightness Levels" depends on TEGRA_SILICON_PLATFORM default n help - Decrease minimum backlight for lowest brightness level used by android. + Allow custom min and max brightness config DEFAULT_DUAL_CORE bool "Default to Dual-Core" From b9fe961f725a2aebd9828451ceb737b02c92cbe4 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 5 Apr 2013 18:15:55 -0400 Subject: [PATCH 441/678] mach-tegra: board-grouper-panel.c: user brightness control v2 now includes both min and max backlight adds toggle for on-the-fly redistribution of backlight values across full range with new min/max --- arch/arm/configs/metallice_grouper_defconfig | 40 ++++++++++++---- arch/arm/mach-tegra/board-grouper-panel.c | 50 ++++++++++++++------ 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index f41de23d3ba..9d6ca5f72dd 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-446-666" +CONFIG_LOCALVERSION="-MKernel-a49" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -141,6 +141,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +CONFIG_TRACEPOINTS=y CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_KPROBES=y @@ -315,7 +316,7 @@ CONFIG_TEGRA_PWM=y CONFIG_TEGRA_FIQ_DEBUGGER=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_VOLTAGE_CONTROL=y -CONFIG_DECREASE_MIN_BRIGHTNESS=y +CONFIG_CUSTOM_BRIGHTNESS=y # CONFIG_DEFAULT_DUAL_CORE is not set CONFIG_GPU_OVERCLOCK=y # CONFIG_GPU_OC_332 is not set @@ -347,7 +348,7 @@ CONFIG_TEGRA_CLUSTER_CONTROL=y CONFIG_TEGRA_AUTO_HOTPLUG=y CONFIG_TEGRA_MC_EARLY_ACK=y CONFIG_TEGRA_MC_PROFILE=y -CONFIG_TEGRA_EDP_LIMITS=y +# CONFIG_TEGRA_EDP_LIMITS is not set CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 # CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set CONFIG_TEGRA_LP1_950=y @@ -355,7 +356,6 @@ CONFIG_TEGRA_RUNNABLE_THREAD=y CONFIG_TEGRA_VARIANT_INFO=y CONFIG_USB_HOTPLUG=y CONFIG_TEGRA_DYNAMIC_PWRDET=y -CONFIG_TEGRA_EDP_EXACT_FREQ=y # CONFIG_TEGRA_USB_MODEM_POWER is not set CONFIG_TEGRA_BB_XMM_POWER=y # CONFIG_TEGRA_BB_XMM_POWER2 is not set @@ -634,7 +634,7 @@ CONFIG_INET_ESP=y CONFIG_INET_TUNNEL=y CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y -CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set CONFIG_TCP_CONG_ADVANCED=y @@ -946,6 +946,7 @@ CONFIG_XPS=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_IRDA is not set @@ -3044,7 +3045,7 @@ CONFIG_NFS_V3=y # CONFIG_NFS_V3_ACL is not set CONFIG_NFS_V4=y # CONFIG_NFS_V4_1 is not set -# CONFIG_ROOT_NFS is not set +CONFIG_ROOT_NFS=y # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y # CONFIG_NFS_USE_NEW_IDMAPPER is not set @@ -3168,12 +3169,13 @@ CONFIG_SCHED_DEBUG=y # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_ATOMIC_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -# CONFIG_STACKTRACE is not set +CONFIG_STACKTRACE=y # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_HIGHMEM is not set # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_DEBUG_INFO is not set +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_INFO_REDUCED is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set @@ -3196,13 +3198,31 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FAULT_INJECTION is not set # CONFIG_SYSCTL_SYSCALL_CHECK is not set # CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +CONFIG_EVENT_POWER_TRACING_DEPRECATED=y +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_TRACING=y CONFIG_TRACING_SUPPORT=y -# CONFIG_FTRACE is not set +CONFIG_FTRACE=y +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_RING_BUFFER_BENCHMARK is not set +# CONFIG_TRACELEVEL is not set CONFIG_DYNAMIC_DEBUG=y # CONFIG_DMA_API_DEBUG is not set # CONFIG_ATOMIC64_SELFTEST is not set @@ -3335,7 +3355,7 @@ CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set # CONFIG_CRYPTO_DEV_TEGRA_AES is not set CONFIG_CRYPTO_DEV_TEGRA_SE=y -# CONFIG_BINARY_PRINTF is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 8faa6bb485a..8f5dcd837b4 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -39,10 +39,13 @@ #include "gpio-names.h" #include - #include -static unsigned int min_backlight = 13; +static bool otf_scaling = 0; +module_param(otf_scaling, bool, 0644); +static unsigned int min_backlight = 10; module_param(min_backlight, uint, 0644); +static unsigned int max_backlight = 160; +module_param(max_backlight, uint, 0644); /* grouper default display board pins */ #define grouper_lvds_avdd_en TEGRA_GPIO_PH6 @@ -71,8 +74,8 @@ static struct regulator *grouper_lvds_reg; static struct regulator *grouper_lvds_vdd_panel; static tegra_dc_bl_output grouper_bl_output_measured = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, + 0, 2, 4, 6, 9, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, @@ -148,7 +151,6 @@ static void grouper_backlight_exit(struct device *dev) static int grouper_backlight_notify(struct device *unused, int brightness) { int cur_sd_brightness = atomic_read(&sd_brightness); - int min_brightness = min_backlight; /* Set the backlight GPIO pin mode to 'backlight_enable' */ //gpio_set_value(grouper_bl_enb, !!brightness); @@ -156,19 +158,39 @@ static int grouper_backlight_notify(struct device *unused, int brightness) /* SD brightness is a percentage, 8-bit value. */ brightness = (brightness * cur_sd_brightness) / 255; - /* Ensure that min backlight goes up to at least 10 to prevent auto-min != slider-min */ - if (min_backlight < 10) - min_brightness = 10; - /* Apply any backlight response curve */ - if (brightness > 255) + if (brightness > 255) { pr_info("Error: Brightness > 255!\n"); - else - if ((brightness > 0) && (brightness < min_brightness)) { - brightness = min_backlight; - } else { + } else { +#ifdef CONFIG_CUSTOM_BRIGHTNESS + if ((min_backlight == 0) || (max_backlight == 0)) { +#endif brightness = bl_output[brightness]; +#ifdef CONFIG_CUSTOM_BRIGHTNESS + } else { + if (otf_scaling == 0) { + int min_bl_adj = min_backlight; + /* Ensure that min backlight goes up to at least 10 to prevent auto-min != slider-min */ + if (min_backlight < 10) + min_bl_adj = 10; + if ((brightness > 0) && (brightness < min_bl_adj)) { + brightness = min_backlight; + } else if (brightness > max_backlight) { + brightness = max_backlight; + } else { + brightness = bl_output[brightness]; + } + } else { + if (brightness == 0) { + brightness = 0; + } else { + brightness = min_backlight + + DIV_ROUND_CLOSEST(((max_backlight - min_backlight) * max((brightness - 10),0)),245); + } + } } +#endif + } return brightness; } From 7b2be131fab57074b040030ac73e35c7941dc46e Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 6 Apr 2013 15:52:42 -0400 Subject: [PATCH 442/678] mach-tegra: board-grouper-panel.c: fix for min brightness --- arch/arm/mach-tegra/board-grouper-panel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 8f5dcd837b4..1b8d797271d 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -40,7 +40,7 @@ #include #include -static bool otf_scaling = 0; +static bool otf_scaling = 1; module_param(otf_scaling, bool, 0644); static unsigned int min_backlight = 10; module_param(min_backlight, uint, 0644); @@ -171,8 +171,8 @@ static int grouper_backlight_notify(struct device *unused, int brightness) if (otf_scaling == 0) { int min_bl_adj = min_backlight; /* Ensure that min backlight goes up to at least 10 to prevent auto-min != slider-min */ - if (min_backlight < 10) - min_bl_adj = 10; + if (min_backlight < 11) + min_bl_adj = 11; if ((brightness > 0) && (brightness < min_bl_adj)) { brightness = min_backlight; } else if (brightness > max_backlight) { From 9a77698f53008f3922f40843540a448774599d4c Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 6 Apr 2013 15:55:22 -0400 Subject: [PATCH 443/678] Revert "Proportional Rate Reduction for TCP." This reverts commit a5439ddd77ac24480993567539bb67c111c2568f. --- include/linux/tcp.h | 4 --- net/ipv4/tcp_input.c | 58 +++++-------------------------------------- net/ipv4/tcp_output.c | 7 +----- 3 files changed, 7 insertions(+), 62 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6b63b310af3..531ede8006d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -379,10 +379,6 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; - u32 prior_cwnd; /* Congestion window at start of Recovery. */ - u32 prr_delivered; /* Number of newly delivered packets to - * receiver in Recovery. */ - u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e12dfb3f62e..d73aab3fbfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2828,13 +2828,9 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - - /* Do not moderate cwnd if it's already undone in cwr or recovery. */ - if (tp->undo_marker) { - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - else /* PRR */ - tp->snd_cwnd = tp->snd_ssthresh; + /* Do not moderate cwnd if it's already undone in cwr or recovery */ + if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2952,38 +2948,6 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); -/* This function implements the PRR algorithm, specifcally the PRR-SSRB - * (proportional rate reduction with slow start reduction bound) as described in - * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. - * It computes the number of packets to send (sndcnt) based on packets newly - * delivered: - * 1) If the packets in flight is larger than ssthresh, PRR spreads the - * cwnd reductions across a full RTT. - * 2) If packets in flight is lower than ssthresh (such as due to excess - * losses and/or application stalls), do not perform any further cwnd - * reductions, but instead slow start up to ssthresh. - */ -static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, - int fast_rexmit, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - int sndcnt = 0; - int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); - - if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { - u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + - tp->prior_cwnd - 1; - sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else { - sndcnt = min_t(int, delta, - max_t(int, tp->prr_delivered - tp->prr_out, - newly_acked_sacked) + 1); - } - - sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); - tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; -} - /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2995,8 +2959,7 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3146,17 +3109,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; - tp->prior_cwnd = tp->snd_cwnd; - tp->prr_delivered = 0; - tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tp->prr_delivered += newly_acked_sacked; - tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); + tcp_cwnd_down(sk, flag); tcp_xmit_retransmit_queue(sk); } @@ -3671,8 +3630,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets; - int prior_sacked = tp->sacked_out; - int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3744,9 +3701,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); - newly_acked_sacked = (prior_packets - prior_sacked) - - (tp->packets_out - tp->sacked_out); - if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3759,7 +3713,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - newly_acked_sacked, flag); + flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b1f32b459e5..faf257b9415 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1794,13 +1794,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tcp_event_new_data_sent(sk, skb); tcp_minshall_update(tp, mss_now, skb); - sent_pkts += tcp_skb_pcount(skb); + sent_pkts++; if (push_one) break; } - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) - tp->prr_out += sent_pkts; if (likely(sent_pkts)) { tcp_cwnd_validate(sk); @@ -2294,9 +2292,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk) return; NET_INC_STATS_BH(sock_net(sk), mib_idx); - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) - tp->prr_out += tcp_skb_pcount(skb); - if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, From 35135e6018935bfc8f3c374006d7aa520853228f Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 14 Mar 2013 15:45:55 -0400 Subject: [PATCH 444/678] include: linux: blkdev.h: increase max nr_requests to 512 --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94d1b95c69f..4a2662facdd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -33,7 +33,7 @@ struct sg_io_hdr; struct bsg_job; #define BLKDEV_MIN_RQ 4 -#define BLKDEV_MAX_RQ 128 /* Default maximum */ +#define BLKDEV_MAX_RQ 512 /* Non-Default maximum (128) */ struct request; typedef void (rq_end_io_fn)(struct request *, int); From 6d9cca959726d555166d9c6cdba8b7bdd4ce9135 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 6 Apr 2013 17:52:41 -0400 Subject: [PATCH 445/678] defconfig: a49 --- arch/arm/configs/metallice_grouper_defconfig | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 9d6ca5f72dd..403702c6773 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -184,11 +184,11 @@ CONFIG_IOSCHED_ROW=y # CONFIG_IOSCHED_VR is not set CONFIG_IOSCHED_BFQ=y CONFIG_CGROUP_BFQIO=y -CONFIG_DEFAULT_DEADLINE=y -# CONFIG_DEFAULT_ROW is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_ROW=y # CONFIG_DEFAULT_BFQ is not set # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="deadline" +CONFIG_DEFAULT_IOSCHED="row" # CONFIG_INLINE_SPIN_TRYLOCK is not set # CONFIG_INLINE_SPIN_TRYLOCK_BH is not set # CONFIG_INLINE_SPIN_LOCK is not set @@ -2922,9 +2922,8 @@ CONFIG_AMI306=y # CONFIG_IIO_GPIO_TRIGGER is not set # CONFIG_IIO_SYSFS_TRIGGER is not set # CONFIG_IIO_SIMPLE_DUMMY is not set -CONFIG_XVMALLOC=y -CONFIG_ZRAM=y -# CONFIG_ZRAM_DEBUG is not set +# CONFIG_XVMALLOC is not set +# CONFIG_ZRAM is not set # CONFIG_FB_SM7XX is not set # CONFIG_VIDEO_DT3155 is not set # CONFIG_CRYSTALHD is not set From a0294d6b5c32c74b7141b1a82754f0d09616c5b7 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 8 Apr 2013 15:47:24 -0400 Subject: [PATCH 446/678] defconfig: update --- arch/arm/configs/metallice_grouper_defconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 403702c6773..2fd11c40963 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a49" +CONFIG_LOCALVERSION="-MKernel-a0" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -348,7 +348,7 @@ CONFIG_TEGRA_CLUSTER_CONTROL=y CONFIG_TEGRA_AUTO_HOTPLUG=y CONFIG_TEGRA_MC_EARLY_ACK=y CONFIG_TEGRA_MC_PROFILE=y -# CONFIG_TEGRA_EDP_LIMITS is not set +CONFIG_TEGRA_EDP_LIMITS=y CONFIG_TEGRA_EMC_TO_DDR_CLOCK=1 # CONFIG_TEGRA_CONVSERVATIVE_GOV_ON_EARLYSUPSEND is not set CONFIG_TEGRA_LP1_950=y @@ -356,7 +356,8 @@ CONFIG_TEGRA_RUNNABLE_THREAD=y CONFIG_TEGRA_VARIANT_INFO=y CONFIG_USB_HOTPLUG=y CONFIG_TEGRA_DYNAMIC_PWRDET=y -# CONFIG_TEGRA_USB_MODEM_POWER is not set +CONFIG_TEGRA_EDP_EXACT_FREQ=y +CONFIG_TEGRA_USB_MODEM_POWER=y CONFIG_TEGRA_BB_XMM_POWER=y # CONFIG_TEGRA_BB_XMM_POWER2 is not set # CONFIG_TEGRA_THERMAL_SYSFS is not set From 9d1bc3e51981f52c68d09e7d26579f951c59ecb6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 8 Apr 2013 17:32:21 -0400 Subject: [PATCH 447/678] mach-tegra: edp.c: workaround to keep edp levels appropriate --- arch/arm/mach-tegra/edp.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/edp.c b/arch/arm/mach-tegra/edp.c index 1ba0e5f1530..e7e96a22b64 100644 --- a/arch/arm/mach-tegra/edp.c +++ b/arch/arm/mach-tegra/edp.c @@ -362,10 +362,17 @@ void __init tegra_init_cpu_edp_limits(unsigned int regulator_mA) for (j = 0; j < edp_limits_size; j++) { e[j].temperature = (int)t[i+j].temperature; - e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-40) * 10000; - e[j].freq_limits[1] = (unsigned int)(t[i+j].freq_limits[1]-30) * 10000; - e[j].freq_limits[2] = (unsigned int)(t[i+j].freq_limits[2]-30) * 10000; - e[j].freq_limits[3] = (unsigned int)(t[i+j].freq_limits[3]-30) * 10000; + if (j == 0) { + e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-15) * 10000; + e[j].freq_limits[1] = (unsigned int)(t[i+j].freq_limits[1]-5) * 10000; + e[j].freq_limits[2] = (unsigned int)(t[i+j].freq_limits[2]-5) * 10000; + e[j].freq_limits[3] = (unsigned int)(t[i+j].freq_limits[3]-5) * 10000; + } else { + e[j].freq_limits[0] = (unsigned int)(t[i+j].freq_limits[0]-30) * 10000; + e[j].freq_limits[1] = (unsigned int)(t[i+j].freq_limits[1]-20) * 10000; + e[j].freq_limits[2] = (unsigned int)(t[i+j].freq_limits[2]-20) * 10000; + e[j].freq_limits[3] = (unsigned int)(t[i+j].freq_limits[3]-20) * 10000; + } } if (edp_limits != edp_default_limits) From b34e8be169ace149d3a53fe06c960b8ee8f565f6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 9 Apr 2013 10:03:42 -0400 Subject: [PATCH 448/678] Revert "Revert "net: wireless: bcmdhd: Enable P2P probe request handling only during discovery"" This reverts commit 373d74c0df2a3da125883491a876c2ec786b19a6. --- drivers/net/wireless/bcmdhd/dhd_linux.c | 1 - drivers/net/wireless/bcmdhd/wl_cfg80211.c | 3 +-- drivers/net/wireless/bcmdhd/wl_cfg80211.h | 1 + drivers/net/wireless/bcmdhd/wl_cfgp2p.c | 9 ++++++++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index 6cc2e1ed89b..a871bdba807 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -3391,7 +3391,6 @@ dhd_preinit_ioctls(dhd_pub_t *dhd) setbit(eventmask, WLC_E_ACTION_FRAME_RX); setbit(eventmask, WLC_E_ACTION_FRAME_COMPLETE); setbit(eventmask, WLC_E_ACTION_FRAME_OFF_CHAN_COMPLETE); - setbit(eventmask, WLC_E_P2P_PROBREQ_MSG); setbit(eventmask, WLC_E_P2P_DISC_LISTEN_COMPLETE); } #endif /* WL_CFG80211 */ diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index b4f47d1393c..2f28bf1d3f7 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -331,7 +331,6 @@ static __used bool wl_is_ibssstarter(struct wl_priv *wl); */ static s32 __wl_cfg80211_up(struct wl_priv *wl); static s32 __wl_cfg80211_down(struct wl_priv *wl); -static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); static bool wl_is_linkdown(struct wl_priv *wl, const wl_event_msg_t *e); static bool wl_is_linkup(struct wl_priv *wl, const wl_event_msg_t *e, struct net_device *ndev); static bool wl_is_nonetwork(struct wl_priv *wl, const wl_event_msg_t *e); @@ -7089,7 +7088,7 @@ static s32 wl_config_ifmode(struct wl_priv *wl, struct net_device *ndev, s32 ift return 0; } -static s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) +s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add) { s8 iovbuf[WL_EVENTING_MASK_LEN + 12]; diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.h b/drivers/net/wireless/bcmdhd/wl_cfg80211.h index dfb0d0de2f7..6d237eee2cc 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.h +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.h @@ -689,4 +689,5 @@ void wl_cfg80211_enable_trace(int level); extern s32 wl_update_wiphybands(struct wl_priv *wl); extern s32 wl_cfg80211_if_is_group_owner(void); extern int wl_cfg80211_update_power_mode(struct net_device *dev); +extern s32 wl_add_remove_eventmsg(struct net_device *ndev, u16 event, bool add); #endif /* _wl_cfg80211_h_ */ diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c index 7bcd14486dd..aedf9705b44 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c @@ -641,7 +641,7 @@ wl_cfgp2p_enable_discovery(struct wl_priv *wl, struct net_device *dev, } set_ie: ret = wl_cfgp2p_set_management_ie(wl, dev, - wl_cfgp2p_find_idx(wl, dev), + wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE), VNDR_IE_PRBREQ_FLAG, ie, ie_len); if (unlikely(ret < 0)) { @@ -1230,6 +1230,10 @@ wl_cfgp2p_listen_complete(struct wl_priv *wl, struct net_device *ndev, } cfg80211_remain_on_channel_expired(ndev, wl->last_roc_id, &wl->remain_on_chan, wl->remain_on_chan_type, GFP_KERNEL); + if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), + WLC_E_P2P_PROBREQ_MSG, false) != BCME_OK) { + CFGP2P_ERR((" failed to unset WLC_E_P2P_PROPREQ_MSG\n")); + } } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); @@ -1321,6 +1325,9 @@ wl_cfgp2p_discover_listen(struct wl_priv *wl, s32 channel, u32 duration_ms) } else wl_clr_p2p_status(wl, LISTEN_EXPIRED); + if (wl_add_remove_eventmsg(wl_to_prmry_ndev(wl), WLC_E_P2P_PROBREQ_MSG, true) != BCME_OK) { + CFGP2P_ERR((" failed to set WLC_E_P2P_PROPREQ_MSG\n")); + } wl_cfgp2p_set_p2p_mode(wl, WL_P2P_DISC_ST_LISTEN, channel, (u16) duration_ms, wl_to_p2p_bss_bssidx(wl, P2PAPI_BSSCFG_DEVICE)); _timer = &wl->p2p->listen_timer; From 3849af9cb7b9447d3de8462dd9de02006d8f0155 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 9 Apr 2013 10:04:38 -0400 Subject: [PATCH 449/678] Revert "Revert "arm: tegra: Fix modem_reset_flag assignment"" This reverts commit 9c0656765d23cbd250d521ee4626c8793167034a. --- arch/arm/mach-tegra/baseband-xmm-power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/baseband-xmm-power.c b/arch/arm/mach-tegra/baseband-xmm-power.c index 11d9b3f309c..40ef4e1d2da 100755 --- a/arch/arm/mach-tegra/baseband-xmm-power.c +++ b/arch/arm/mach-tegra/baseband-xmm-power.c @@ -807,7 +807,7 @@ static void baseband_xmm_power_init2_work(struct work_struct *work) } else pr_err("%s: hsic_register is missing\n", __func__); register_hsic_device = false; - modem_reset_flag == 0; + modem_reset_flag = 0; } } From 38507b5847744e5d4859e2c13af9abd706e67b6c Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 9 Apr 2013 10:10:05 -0400 Subject: [PATCH 450/678] mach-tegra: tegra3_dvfs.c: increase some voltages --- arch/arm/mach-tegra/tegra3_dvfs.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 1d37c4c4a20..19f9920660d 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,7 +30,9 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; +// a49 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; +// a50 775, 800, 875, 900, 950, 975, 1000, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; + 800, 825, 900, 925, 950, 1000, 1025, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; #endif static bool tegra_dvfs_cpu_disabled; @@ -38,8 +40,9 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { - 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; - +// a49 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; +// a50 775, 800, 875, 900, 950, 975, 1000, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; + 800, 825, 900, 925, 950, 1000, 1025, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; @@ -175,8 +178,12 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), /* Nexus 7 - faking speedo id = 4, process id =2*/ - /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ - CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700), + /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212,*/ +// CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700), + + /* Cpu voltages (mV): 775, 800, 875, 900, 950, 975, 1000, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212,*/ + CPU_DVFS("cpu_g", 4, 2, MHZ, 340, 475, 666, 860, 1000, 1100, 1200, 1300, 1400, 1500, 1600), + CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), From fcc169e6497033598bfa92bcce94a290dd48ed1c Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 9 Apr 2013 19:37:22 -0400 Subject: [PATCH 451/678] mach-tegra: board-grouper-panel.c: set min SD brightness to 10 --- arch/arm/mach-tegra/board-grouper-panel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 1b8d797271d..c92a8a30d04 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -445,7 +445,7 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .bin_width = -1, .aggressiveness = 1, .phase_in_adjustments = true, - .panel_min_brightness = 13, + .panel_min_brightness = 10, .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, From aacd9d3d31f04134ec458f879026b872c0ed9128 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 9 Apr 2013 20:58:09 -0400 Subject: [PATCH 452/678] include: linux: pm_qos_params.h: hard code max cpus to 4 --- include/linux/pm_qos_params.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index be63f01630a..f09793d0eba 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -29,7 +29,7 @@ enum { #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE 0 -#define PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE LONG_MAX +#define PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE 4 #define PM_QOS_CPU_FREQ_MIN_DEFAULT_VALUE 0 #define PM_QOS_CPU_FREQ_MAX_DEFAULT_VALUE LONG_MAX From 82962b80c5d33e038329712a9f9134f2d8a5464a Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 11 Apr 2013 09:48:27 -0400 Subject: [PATCH 453/678] Revert "mach-tegra: tegra3_dvfs.c: increase some voltages" This reverts commit 38507b5847744e5d4859e2c13af9abd706e67b6c. --- arch/arm/mach-tegra/tegra3_dvfs.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-tegra/tegra3_dvfs.c b/arch/arm/mach-tegra/tegra3_dvfs.c index 19f9920660d..1d37c4c4a20 100644 --- a/arch/arm/mach-tegra/tegra3_dvfs.c +++ b/arch/arm/mach-tegra/tegra3_dvfs.c @@ -30,9 +30,7 @@ #ifdef CONFIG_VOLTAGE_CONTROL int user_mv_table[MAX_DVFS_FREQS] = { -// a49 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; -// a50 775, 800, 875, 900, 950, 975, 1000, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; - 800, 825, 900, 925, 950, 1000, 1025, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; #endif static bool tegra_dvfs_cpu_disabled; @@ -40,9 +38,8 @@ static bool tegra_dvfs_core_disabled; static struct dvfs *cpu_dvfs; static const int cpu_millivolts[MAX_DVFS_FREQS] = { -// a49 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; -// a50 775, 800, 875, 900, 950, 975, 1000, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; - 800, 825, 900, 925, 950, 1000, 1025, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212, 1300}; + 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1350, 1400}; + static const unsigned int cpu_cold_offs_mhz[MAX_DVFS_FREQS] = { 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 25, 25, 25, 25}; @@ -178,12 +175,8 @@ static struct dvfs cpu_dvfs_table[] = { CPU_DVFS("cpu_g", 4, 1, MHZ, 480, 480, 650, 650, 780, 780, 990, 1040, 1100, 1200, 1250, 1300, 1330, 1360, 1400, 1500), /* Nexus 7 - faking speedo id = 4, process id =2*/ - /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212,*/ -// CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700), - - /* Cpu voltages (mV): 775, 800, 875, 900, 950, 975, 1000, 1050, 1100, 1125, 1150, 1100, 1125, 1150, 1175, 1200, 1212,*/ - CPU_DVFS("cpu_g", 4, 2, MHZ, 340, 475, 666, 860, 1000, 1100, 1200, 1300, 1400, 1500, 1600), - + /* Cpu voltages (mV): 800, 825, 850, 875, 900, 912, 975, 1000, 1025, 1050, 1075, 1100, 1125, 1150, 1175, 1200, 1212, 1237 */ + CPU_DVFS("cpu_g", 4, 2, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500, 1600, 1700), CPU_DVFS("cpu_g", 4, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1500), CPU_DVFS("cpu_g", 5, 3, MHZ, 550, 550, 770, 770, 910, 910, 1150, 1230, 1280, 1330, 1370, 1400, 1470, 1500, 1500, 1540, 1540, 1700), From c5bc33447628f052380f78dee0b3f01e9682bbc9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 11 Apr 2013 11:50:08 -0400 Subject: [PATCH 454/678] defconfig: a52 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 2fd11c40963..5163c720b57 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a0" +CONFIG_LOCALVERSION="-MKernel-a52" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y From 8c1496106eb00546fe94e31a1ee5c48657828965 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 13 Apr 2013 18:24:08 -0400 Subject: [PATCH 455/678] drivers: net: wireless: bcmdhd: wl_cfg80211.c: grouper adhoc IBSS mode support --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 65 ++++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 2f28bf1d3f7..27963527d41 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -1614,7 +1614,7 @@ __wl_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, WL_DBG(("P2P: GO_NEG_PHASE status cleared \n")); p2p_scan(wl) = true; } - } else { + } else if (wl_get_mode_by_netdev(wl, ndev) != WL_MODE_IBSS) { /* legacy scan trigger * So, we have to disable p2p discovery if p2p discovery is on */ @@ -3064,7 +3064,68 @@ wl_cfg80211_get_station(struct wiphy *wiphy, struct net_device *dev, wl_link_down(wl); } } + else if (wl_get_mode_by_netdev(wl, dev) == WL_MODE_IBSS) { + u8 *curmacp = wl_read_prof(wl, dev, WL_PROF_BSSID); + + memset(&scb_val, 0, sizeof(scb_val)); + bcopy(mac, &scb_val.ea, 6); + + err = wldev_ioctl(dev, WLC_GET_RSSI, &scb_val, + sizeof(scb_val_t), false); + if (err) { + WL_ERR(("Could not get rssi (%d)\n", err)); + return err; + } + rssi = dtoh32(scb_val.val); + + /* the RSSI value from the firmware is an average but user-space + expects it as signal, so we fill in both */ + sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->signal = rssi; + sinfo->filled |= STATION_INFO_SIGNAL_AVG; + sinfo->signal_avg = rssi; + + if (!memcmp(mac, curmacp, ETHER_ADDR_LEN)) { + // BSSID is not a real station. Can't get sta_info; Done + return 0; + } + err = wldev_iovar_getbuf(dev, "sta_info", (struct ether_addr *)mac, + ETHER_ADDR_LEN, wl->ioctl_buf, WLC_IOCTL_MAXLEN, &wl->ioctl_buf_sync); + if (err < 0) { + WL_ERR(("GET STA INFO failed, %d\n", err)); + return err; + } + + sta = (sta_info_t *)wl->ioctl_buf; + sta->len = dtoh16(sta->len); + sta->cap = dtoh16(sta->cap); + sta->flags = dtoh32(sta->flags); + sta->idle = dtoh32(sta->idle); + sta->in = dtoh32(sta->in); + sta->listen_interval_inms = dtoh32(sta->listen_interval_inms); + sta->tx_pkts = dtoh32(sta->tx_pkts); + sta->tx_failures = dtoh32(sta->tx_failures); + sta->rx_ucast_pkts = dtoh32(sta->rx_ucast_pkts); + sta->rx_mcast_pkts = dtoh32(sta->rx_mcast_pkts); + sta->tx_rate = dtoh32(sta->tx_rate); + sta->rx_rate = dtoh32(sta->rx_rate); + sta->rx_decrypt_succeeds = dtoh32(sta->rx_decrypt_succeeds); + sta->rx_decrypt_failures = dtoh32(sta->rx_decrypt_failures); + + sinfo->filled |= STATION_INFO_INACTIVE_TIME | STATION_INFO_TX_PACKETS | + STATION_INFO_TX_FAILED | STATION_INFO_RX_PACKETS | + STATION_INFO_TX_BITRATE | STATION_INFO_RX_BITRATE | + STATION_INFO_RX_DROP_MISC; + + sinfo->inactive_time = sta->idle * 1000; + sinfo->tx_packets = sta->tx_pkts; + sinfo->tx_failed = sta->tx_failures; + sinfo->rx_packets = sta->rx_ucast_pkts + sta->rx_mcast_pkts; + sinfo->txrate.legacy = sta->tx_rate / 100; + sinfo->rxrate.legacy = sta->rx_rate / 100; + sinfo->rx_dropped_misc = sta->rx_decrypt_failures; + } return err; } @@ -4649,7 +4710,7 @@ static s32 wl_setup_wiphy(struct wireless_dev *wdev, struct device *sdiofunc_dev wdev->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; #endif /* WL_SCHED_SCAN */ wdev->wiphy->interface_modes = - BIT(NL80211_IFTYPE_STATION) + BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_MONITOR); wdev->wiphy->bands[IEEE80211_BAND_2GHZ] = &__wl_band_2ghz; From 17ab78668efafd56376874a93f17ed3cbd1a415f Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Mon, 28 Jan 2013 12:34:27 +0100 Subject: [PATCH 456/678] usb-hostmode-charging Change-Id: Ia4df475c69068981bd204663c8328d42eb50646d Conflicts: drivers/power/smb347-charger.c --- drivers/power/smb347-charger.c | 478 +++++++++++++++++++++++++-------- drivers/usb/host/ehci-tegra.c | 39 ++- drivers/usb/otg/tegra-otg.c | 28 +- 3 files changed, 427 insertions(+), 118 deletions(-) diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index e466bd548c5..e752633e2ef 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -103,7 +103,7 @@ #define APSD_DCP 0x02 #define APSD_OTHER 0x03 #define APSD_SDP 0x04 -#define APSD_SDP2 0x06 // tmtmtm: USB host mode charging +#define APSD_HOST_MODE_CHARGING 0x06 #define USB_30 0x20 #define DCIN_OV_UV_STS 0x50 #define DELAY_FOR_CURR_LIMIT_RECONF (60) @@ -130,6 +130,15 @@ static unsigned int project_id; static unsigned int pcba_ver; static int gpio_dock_in = 0; +// tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 +static int fixed_install_mode = 0; +volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c +static volatile int host_mode_charging_state = 0; +static volatile int lastExternalPowerState = 0; +static volatile int lastOtgState = 0; +static volatile int lastChargeSlaveDevicesState = 0; +static volatile int hostmode_waiting_for_power = 0; + /* Sysfs interface */ static DEVICE_ATTR(reg_status, S_IWUSR | S_IRUGO, smb347_reg_show, NULL); @@ -243,10 +252,12 @@ static void smb347_clear_interrupts(struct i2c_client *client) __func__); } -static int smb347_configure_otg(struct i2c_client *client, int enable) +static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int chargeSlaves, int stopChargeSlaves) { int ret = 0; + printk("smb347_configure_otg %d %d %d %d\n",enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); + /*Enable volatile writes to registers*/ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); if (ret < 0) { @@ -255,68 +266,98 @@ static int smb347_configure_otg(struct i2c_client *client, int enable) goto error; } - if (enable) { + // tmtmtm: we will never charge slave devices in fixed_install_mode + if(!fixed_install_mode) { + if(chargeSlaves) { + if(!lastChargeSlaveDevicesState) { + /* Configure INOK to be active high */ + //printk("smb347_configure_otg INOK to be active high\n"); + ret = smb347_update_reg(client, smb347_SYSOK_USB3, 0x01); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } - /* Configure INOK to be active high */ - ret = smb347_update_reg(client, smb347_SYSOK_USB3, 0x01); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; + /* Change "OTG output current limit" to 250mA */ + //printk("smb347_configure_otg charge slaves 250mA\n"); + ret = smb347_read(client, smb347_OTG_TLIM_REG); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + ret = smb347_write(client, smb347_OTG_TLIM_REG, (ret & (~(1<<3)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + } } + } - /* Change "OTG output current limit" to 250mA */ - ret = smb347_read(client, smb347_OTG_TLIM_REG); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - ret = smb347_write(client, smb347_OTG_TLIM_REG, (ret & (~(1<<3)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - - /* Enable OTG */ - ret = smb347_update_reg(client, smb347_CMD_REG, 0x10); - if (ret < 0) { - dev_err(&client->dev, "%s: Failed in writing register" - "0x%02x\n", __func__, smb347_CMD_REG); - goto error; - } - - /* Change "OTG output current limit" from 250mA to 750mA */ - ret = smb347_update_reg(client, smb347_OTG_TLIM_REG, 0x08); - if (ret < 0) { - dev_err(&client->dev, "%s: Failed in writing register" - "0x%02x\n", __func__, smb347_OTG_TLIM_REG); - goto error; - } - - } else { - /* Disable OTG */ - ret = smb347_read(client, smb347_CMD_REG); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } + if(enableOTG>0) { + if(!lastOtgState) { + printk("smb347_configure_otg enable host mode\n"); + ret = smb347_update_reg(client, smb347_CMD_REG, 0x10); + if (ret < 0) { + dev_err(&client->dev, "%s: Failed in writing register" + "0x%02x\n", __func__, smb347_CMD_REG); + goto error; + } + lastOtgState = 1; + } + } else if(enableOTG==0) { + if(lastOtgState) { + printk("smb347_configure_otg disable host mode\n"); + ret = smb347_read(client, smb347_CMD_REG); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } - ret = smb347_write(client, smb347_CMD_REG, (ret & (~(1<<4)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } + ret = smb347_write(client, smb347_CMD_REG, (ret & (~(1<<4)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + lastOtgState=0; + } + } - /* Configure INOK to be active low */ - ret = smb347_read(client, smb347_SYSOK_USB3); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; + // tmtmtm: we will never charge slave devices in fixed_install_mode + if(!fixed_install_mode) { + if(chargeSlaves) { + if(!lastChargeSlaveDevicesState) { + /* Change "OTG output current limit" from 250mA to 750mA */ + //printk("smb347_configure_otg charge slaves 750mA\n"); + ret = smb347_update_reg(client, smb347_OTG_TLIM_REG, 0x08); + if (ret < 0) { + dev_err(&client->dev, "%s: Failed in writing register" + "0x%02x\n", __func__, smb347_OTG_TLIM_REG); + goto error; + } + lastChargeSlaveDevicesState = 1; + printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); + } } + else + if(stopChargeSlaves) { + if(lastChargeSlaveDevicesState) { + //printk("smb347_configure_otg stop charging slaves\n"); + /* Configure INOK to be active low */ + ret = smb347_read(client, smb347_SYSOK_USB3); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } - ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; + ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + lastChargeSlaveDevicesState = 0; + printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); + } } } @@ -344,14 +385,15 @@ static int smb347_configure_charger(struct i2c_client *client, int value) } if (value) { - /* Enable charging */ - ret = smb347_update_reg(client, smb347_CMD_REG, ENABLE_CHARGE); - if (ret < 0) { - dev_err(&client->dev, "%s(): Failed in writing register" - "0x%02x\n", __func__, smb347_CMD_REG); - goto error; + if(!host_mode_charging_state) { + printk("smb347_configure_charger accept external power\n"); + ret = smb347_update_reg(client, smb347_CMD_REG, ENABLE_CHARGE); + if (ret < 0) { + dev_err(&client->dev, "%s(): Failed in writing register" + "0x%02x\n", __func__, smb347_CMD_REG); + goto error; + } } - /* Configure THERM ctrl */ /* ret = smb347_update_reg(client, smb347_THERM_CTRL, THERM_CTRL); @@ -361,7 +403,8 @@ static int smb347_configure_charger(struct i2c_client *client, int value) } */ } else { - /* Disable charging */ + // tmtmtm: make sure to NEVER call this in fixed_install_mode + printk("smb347_configure_charger do not charge; fixed_install_mode=%d\n",fixed_install_mode); ret = smb347_read(client, smb347_CMD_REG); if (ret < 0) { dev_err(&client->dev, "%s: err %d\n", __func__, ret); @@ -382,6 +425,8 @@ static int smb347_configure_charger(struct i2c_client *client, int value) goto error; } error: + if(ret!=0) + printk(KERN_INFO "smb347_configure_charger ERROR %d\n",ret); return ret; } @@ -390,6 +435,7 @@ static int smb347_charger_enable(bool enable) struct i2c_client *client = charger->client; u8 ret = 0; + printk("smb347_charger_enable %d\n",enable); if (enable) { /*Pin Controls -active low */ ret = smb347_update_reg(client, smb347_PIN_CTRL, PIN_ACT_LOW); @@ -511,6 +557,8 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) } error: + if(ret!=0) + printk(KERN_INFO "smb347_set_InputCurrentlimit ERROR %d\n",ret); wake_unlock(&charger_wakelock); return ret; } @@ -576,6 +624,8 @@ static int smb347_inok_irq(struct smb347_charger *smb) err2: gpio_free(gpio); err1: + //if(err!=0) + // printk(KERN_INFO "smb347_inok_irq ERROR %d\n",err); return err; } @@ -641,7 +691,7 @@ int smb347_hc_mode_callback(bool enable, int cur) if (charger->suspend_ongoing) return 0; - printk("smb347_hc_mode_callback+\n"); + //printk("smb347_hc_mode_callback+\n"); /* Enable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -706,6 +756,7 @@ int smb347_hc_mode_callback(bool enable, int cur) } /* Disable auto power source detection (APSD) */ + //printk("smb347_hc_mode_callback Disable auto power source detection\n"); ret = smb347_clear_reg(client, smb347_CHRG_CTRL, ENABLE_APSD); if (ret < 0) { dev_err(&client->dev, "%s(): Failed in writing" @@ -730,10 +781,12 @@ int smb347_hc_mode_callback(bool enable, int cur) goto error; } - printk("smb347_hc_mode_callback-\n"); + //printk("smb347_hc_mode_callback-\n"); return ret; error: + //if(ret!=0) + // printk(KERN_INFO "smb347_hc_mode_callback ERROR %d\n",ret); return ret; } EXPORT_SYMBOL_GPL(smb347_hc_mode_callback); @@ -811,46 +864,138 @@ static int smb347_configure_interrupts(struct i2c_client *client) return ret; } +static int cable_type_detect(void); + static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, void *data) { struct i2c_client *client = charger->client; int ret; + int newExternalPowerState=0; + + printk("smb347_otg_status from=%d to=%d lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", + from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,fixed_install_mode); + + if(to==10) { + // only when going suspend (OTG PULL) + // small sleep, so that ehci-tegra #### tegra_usb_resume can run first + // and use host_mode_charging_state's current value (probably charging), + // before we call cable_type_detect() (when it will likely switch to not charging) + // FIXME: but is tegra_usb_resume not only called on OTG PLUG? + // FIXME: do I mean "so that tegra_ehci_irq() can run first" ? + schedule_timeout_interruptible(msecs_to_jiffies(100)); + // when doing this pause, smb347_resume() will call cable_type_detect() before we do below + } + + cable_type_detect(); - if ((from == OTG_STATE_A_SUSPEND) && (to == OTG_STATE_A_HOST)) { - - /* configure charger */ - ret = smb347_configure_charger(client, 0); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - - /* ENABLE OTG */ - ret = smb347_configure_otg(client, 1); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - - } else if ((from == OTG_STATE_A_HOST) && (to == OTG_STATE_A_SUSPEND)) { + if (to == OTG_STATE_A_HOST) { + if(charger->cur_cable_type==1 || charger->cur_cable_type==3) + newExternalPowerState = 1; + + if(!newExternalPowerState) { + // no external power + if(fixed_install_mode) { + // allow battery to be charged + printk("smb347_otg_status allow battery to be charged\n"); + ret = smb347_configure_charger(client, 1); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + // disableOTG, dont chargeSlaves, don't stopChargeSlaves + printk("smb347_otg_status disableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); + ret = smb347_configure_otg(client, 0, 0, 0); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + } else { + // tmtmtm: mobile-mode: we need to be careful NOT to disable charger detection too early + // once we start charging slaves ourselfs, we will not be able to detect ext power coming in + + // also: why are we waiting here, if inok_isr_work_function is called on power + // we actually depend on it to arrive in parallel + + // make external power detectable in case it is coming back + printk("smb347_otg_status make external power detectable\n"); + ret = smb347_configure_interrupts(client); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + + printk("smb347_otg_status waiting for external power...\n"); + // if power is detected, inok_isr_work_function will strike after aprox 1500 ms + schedule_timeout_interruptible(msecs_to_jiffies(500)); + schedule_timeout_interruptible(msecs_to_jiffies(500)); + schedule_timeout_interruptible(msecs_to_jiffies(400)); + schedule_timeout_interruptible(msecs_to_jiffies(400)); + if(charger->cur_cable_type==1 || charger->cur_cable_type==3) + newExternalPowerState = 1; + if(!newExternalPowerState) { + cable_type_detect(); + if(charger->cur_cable_type==1 || charger->cur_cable_type==3) + newExternalPowerState = 1; + } + printk("smb347_otg_status waiting for external power done %d\n",newExternalPowerState); + + if(!newExternalPowerState) { + // battery will NOT be charged + ret = smb347_configure_charger(client, 0); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + // enableOTG, chargeSlaves, don't stopChargeSlaves + ret = smb347_configure_otg(client, 1, 1, 0); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + } + } + } - /* Disable OTG */ - ret = smb347_configure_otg(client, 0); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); + if(newExternalPowerState) { + // allow battery to be charged + printk("smb347_otg_status allow battery to be charged\n"); + ret = smb347_configure_charger(client, 1); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + // enableOTG, don't chargeSlaves, don't stopChargeSlaves + printk("smb347_otg_status enableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); + ret = smb347_configure_otg(client, 1, 0, 0); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + } - /* configure charger */ - ret = smb347_configure_charger(client, 1); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); + } else if (to == OTG_STATE_A_SUSPEND) { + + if(from == OTG_STATE_A_HOST) { + // disable host-mode and stop slave-charging + printk("smb347_otg_status disable host-mode and stop slave-charging\n"); + ret = smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + // allow battery to be charged + printk("smb347_otg_status allow battery to be charged\n"); + ret = smb347_configure_charger(client, 1); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + } + } - /* + //if(!newExternalPowerState /*&& !lastChargeSlaveDevicesState*/) { + // make external power detectable in case it is coming back + printk("smb347_otg_status make external power detectable\n"); ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); - */ - } + //} + + lastExternalPowerState = newExternalPowerState; + printk("smb347_otg_status DONE lastOtgState=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", + lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); } /* workqueue function */ @@ -862,6 +1007,7 @@ static int cable_type_detect(void) int ac_ok = GPIO_AC_OK; int dock_in = gpio_dock_in; + printk(KERN_INFO "cable_type_detect()\n"); /* printk("cable_type_detect %d %lu %d %x jiffies=%lu %lu+\n", charger->old_cable_type, @@ -872,15 +1018,19 @@ static int cable_type_detect(void) charger->time_of_1800mA_limit+(ADAPTER_PROTECT_DELAY*HZ)); */ - if((pcba_ver <= GROUPER_PCBA_ER2) && (project_id == GROUPER_PROJECT_NAKASI)) + if((pcba_ver <= GROUPER_PCBA_ER2) && (project_id == GROUPER_PROJECT_NAKASI)) { + printk(KERN_INFO "cable_type_detect() wrong\n"); return 0; + } + host_mode_charging_state = 0; mutex_lock(&charger->cable_lock); if ((charger->old_cable_type == ac_cable) && charger->time_of_1800mA_limit && gpio_get_value(ac_ok) && time_after(charger->time_of_1800mA_limit+ ADAPTER_PROTECT_DELAY, jiffies)) { + printk(KERN_INFO "cable_type_detect() charger->test_1800mA_fail\n"); smb347_set_InputCurrentlimit(client, 900); charger->test_1800mA_fail = 1; queue_delayed_work(smb347_wq, @@ -888,7 +1038,7 @@ static int cable_type_detect(void) } if (gpio_get_value(ac_ok)) { - printk(KERN_INFO "INOK=H\n"); + printk(KERN_INFO "INOK=H no power\n"); charger->cur_cable_type = non_cable; smb347_set_InputCurrentlimit(client, 900); success = battery_callback(non_cable); @@ -903,8 +1053,15 @@ static int cable_type_detect(void) if (!(retval & DCIN_OV_UV_STS) && !gpio_get_value(dock_in)) { SMB_NOTICE("DC_IN\n"); success = battery_callback(ac_cable); - } else { + // tmtmtm + charger->cur_cable_type = ac_cable; + if(fixed_install_mode) { + host_mode_charging_state = 1; + printk(KERN_INFO "cable_type_detect() enabled host_mode_charging_state on DC_IN ######\n"); + } + + } else { /* cable type dection */ retval = smb347_read(client, smb347_STS_REG_E); SMB_NOTICE("Reg3F : 0x%02x\n", retval); @@ -942,17 +1099,17 @@ static int cable_type_detect(void) #ifdef TOUCH_CALLBACK_ENABLED touch_callback(usb_cable); #endif - // tmtmtm start - } else if(retval == APSD_SDP2) { - printk("Cable: SDP2 host mode charging\n"); + } else if(retval == APSD_HOST_MODE_CHARGING) { // tmtmtm + printk(KERN_INFO "Cable: host mode charging\n"); + charger->cur_cable_type = usb_cable; success = battery_callback(usb_cable); + host_mode_charging_state = 1; // tmtmtm #ifdef TOUCH_CALLBACK_ENABLED touch_callback(usb_cable); -#endif - // tmtmtm end +#endif } else { charger->cur_cable_type = unknow_cable; - printk(KERN_INFO "Unkown Plug In Cable type !! retval=%d\n",retval); + printk(KERN_INFO "Unkown Plug In Cable type !\n"); if (gpio_get_value(dock_in)) { charger->cur_cable_type = usb_cable; success = battery_callback(usb_cable); @@ -965,6 +1122,13 @@ static int cable_type_detect(void) } else { charger->cur_cable_type = unknow_cable; printk(KERN_INFO "USBIN=0\n"); + + // tmtmtm: battery tab keeps stating "Charging (AC)" + if(fixed_install_mode) { + host_mode_charging_state = 0; + printk(KERN_INFO "cable_type_detect() disabled host_mode_charging_state ############\n"); + } + success = battery_callback(non_cable); } } } @@ -986,17 +1150,106 @@ static void inok_isr_work_function(struct work_struct *dat) { struct i2c_client *client = charger->client; - cancel_delayed_work(&charger->curr_limit_work); - cancel_delayed_work(&charger->inok_isr_work); + // called on power loss/gain, but also if just a bare (non-powered) OTG adapter is pulled + printk("inok_isr_work_function lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", + lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); + + if(lastOtgState>0 && lastExternalPowerState>0) { + // we used to be in externally powered host mode + // this means external power was just lost + cancel_delayed_work(&charger->curr_limit_work); + cancel_delayed_work(&charger->inok_isr_work); + + // tmtmtm: no external power: in fixed_install_mode we prepare for power to come back + if(fixed_install_mode) { + smb347_clear_interrupts(client); + + // stop host-mode, don't chargeSlaves, don't stopChargeSlaves + printk("inok_isr_work_function fixed_install stop host-mode, don't chargeSlaves, don't stopChargeSlaves\n"); + if(smb347_configure_otg(client, 0, 0, 0)<0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + + // enable external power detection + printk("inok_isr_work_function fixed_install make external power detectable\n"); + if(smb347_configure_interrupts(client)<0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + + lastExternalPowerState = 0; + printk("inok_isr_work_function fixed_install make host aware it is now discharging\n"); + // make device aware it is now discharging + // tmtmtm: notwending ??? + cable_type_detect(); + + } else { + printk("inok_isr_work_function lost external power in host mode; charge slave devices\n"); + + // normally, smb347_otg_status() is called whenever the OTG adapter is pulled or plugged + // here, external power was lost while the OTG adapter remained plugged + // we call smb347_otg_status() now, to activate self-charging of slave devices + // so we can continue host mode in OTG mode + // if we would NOT call smb347_otg_status() here, slave devices would stay without power now + +// tmtmtm: we don't want to call this, if OTG-adapter is pulled (not just power) + smb347_otg_status(OTG_STATE_A_HOST,OTG_STATE_A_HOST,NULL); + } + if(!lastExternalPowerState) { + // make external power detectable in case it is coming back + printk("inok_isr_work_function make external power detectable\n"); + int ret = smb347_configure_interrupts(client); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + } + + printk("inok_isr_work_function done lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", + lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); + return; + } + + // we were NOT in externally powered host mode cable_type_detect(); + if(charger->cur_cable_type!=1 && charger->cur_cable_type!=3) { + // still no power incoming + printk("inok_isr_work_function no power lastExternalPowerState=%d\n",lastExternalPowerState); + if(lastExternalPowerState) { + cancel_delayed_work(&charger->curr_limit_work); + cancel_delayed_work(&charger->inok_isr_work); + smb347_clear_interrupts(client); + + // make device aware it is now discharging + lastExternalPowerState = 0; + } - smb347_clear_interrupts(client); + // make external power detectable + printk("inok_isr_work_function make external power detectable\n"); + int ret = smb347_configure_interrupts(client); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + return; + } + + // power is incoming + lastExternalPowerState = 1; + + // host_mode_charging_state may have been set by cable_type_detect() + if(host_mode_charging_state>0 && lastOtgState==0) { + printk("inok_isr_work_function external power available, start host mode\n"); + if(smb347_configure_otg(client, 1, 0, 0)<0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + } + + //smb347_clear_interrupts(client); // FIXME??? + printk("inok_isr_work_function external power available lastOtgState=%d\n",lastOtgState); } static void dockin_isr_work_function(struct work_struct *dat) { - struct i2c_client *client = charger->client; + //struct i2c_client *client = charger->client; int dock_in = gpio_dock_in; int ac_ok = GPIO_AC_OK; @@ -1092,8 +1345,9 @@ static int __devinit smb347_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); - int ret, irq_num; - uint8_t buf[15]; + int ret; + //int irq_num; + //uint8_t buf[15]; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) return -EIO; @@ -1151,7 +1405,6 @@ static int __devinit smb347_probe(struct i2c_client *client, } queue_delayed_work(smb347_wq, &charger->cable_det_work, 0.5*HZ); - ret = register_otg_callback(smb347_otg_status, charger); if (ret < 0) goto error; @@ -1173,6 +1426,7 @@ static int __devexit smb347_remove(struct i2c_client *client) static int smb347_suspend(struct i2c_client *client) { charger->suspend_ongoing = 1; + smb347_deep_sleep = 1; // tmtmtm printk("smb347_suspend+\n"); flush_workqueue(smb347_wq); @@ -1197,7 +1451,7 @@ static int smb347_shutdown(struct i2c_client *client) printk("smb347_shutdown+\n"); /* Disable OTG */ - ret = smb347_configure_otg(client, 0); + ret = smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index d5212ebc968..bf728f259ef 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -58,6 +58,9 @@ #define USB3_PREFETCH_ID 17 extern void baseband_xmm_L3_resume_check(void); +extern volatile int smb347_deep_sleep; // tmtmtm: from smb347-charger.c +//extern volatile int host_mode_charging_state; // tmtmtm: from smb347-charger.c +//extern int fixed_install_mode; // tmtmtm: from smb347-charger.c static struct usb_hcd *modem_ehci_handle; struct tegra_ehci_hcd { @@ -219,11 +222,27 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } else if (tegra->bus_suspended && tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { - printk("%s: no device connected before suspend\n", __func__); + // tmtmtm: OTG UNPLUG + // original intent: when waking up from deep sleep, skip the default return, + // if host_mode_charging AND fixed_install_mode are set + //if(host_mode_charging_state && fixed_install_mode) { + // printk("ehci-tegra %s waking up with host_mode_charging: special\n", __func__); + if(smb347_deep_sleep) { + printk("ehci-tegra %s wake-up/OTG-UNPLUG with smb347_deep_sleep: special\n", __func__); + // fix: skip default return + + // FIXME + // DAS EINSCHRÄNKEN AUF DEN fixed_install_mode löst das problem nur im MOBILE kernel + // ECHTE LÖSUNG: ONLY skip default return when really waking up from deep sleep + // das kann sowohl bei unplug als auch bei plug passieren + } else { + printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal return\n", __func__); spin_unlock(&ehci->lock); return 0; + } } spin_unlock(&ehci->lock); + //printk("ehci-tegra %s post spin_unlock\n", __func__); } irq_status = ehci_irq(hcd); @@ -233,12 +252,15 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } if (ehci->controller_remote_wakeup) { + //printk("ehci-tegra %s ehci->controller_remote_wakeup\n", __func__); ehci->controller_remote_wakeup = false; /* disable interrupts */ ehci_writel(ehci, 0, &ehci->regs->intr_enable); tegra_usb_phy_preresume(tegra->phy, true); tegra->port_resuming = 1; + //printk("ehci-tegra %s ehci->controller_remote_wakeup done\n", __func__); } + //printk("ehci-tegra %s return irq_status=%d\n", __func__,irq_status); return irq_status; } @@ -608,9 +630,24 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) tegra_ehci_power_up(hcd, is_dpd); set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); + // tmtmtm: OTG PLUG + // original intent: skip the default restart, if host_mode_charging is set + // FIXME: hier keine einschränkung auf fixed_install_mode? + //if(host_mode_charging_state) { + // printk("ehci-tegra ######### tegra_usb_resume host_mode_charging: special\n"); + if(smb347_deep_sleep) { + printk("ehci-tegra %s wake-up/OTG-PLUG with smb347_deep_sleep: special\n", __func__); + // kommt u.a. + // wenn der gepowerte OTG adapter gesteckt wird (mobile-use kernel) + // fix: skip default restart + } else if ((tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) || (hsic) || (null_ulpi)) + { + //printk("ehci-tegra #### tegra_usb_resume !host_mode_charging: restart\n"); + printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal restart\n", __func__); goto restart; + } /* Force the phy to keep data lines in suspend state */ tegra_ehci_phy_restore_start(tegra->phy, tegra->port_speed); diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index c1fe7f899f1..def2b91bebe 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -43,6 +43,8 @@ #define USB_VBUS_STATUS (1 << 10) #define USB_INTS (USB_VBUS_INT_STATUS | USB_ID_INT_STATUS) +extern volatile int smb347_deep_sleep; // tmtmtm: from smb347-charger.c + typedef void (*callback_t)(enum usb_otg_state to, enum usb_otg_state from, void *args); @@ -166,6 +168,7 @@ void tegra_start_host(struct tegra_otg_data *tegra) void tegra_stop_host(struct tegra_otg_data *tegra) { + //dev_info(tegra->otg.dev, "tegra_stop_host\n"); if (tegra->pdev) { tegra_usb_otg_host_unregister(tegra->pdev); tegra->pdev = NULL; @@ -230,21 +233,36 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - if (tegra->charger_cb) - tegra->charger_cb(to, from, tegra->charger_cb_data); + smb347_deep_sleep = 0; + dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); + // tmtmtm if (to == OTG_STATE_A_SUSPEND) { - if (from == OTG_STATE_A_HOST) + if (from == OTG_STATE_A_HOST) { + //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s before\n"); + if (tegra->charger_cb) { + //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s\n"); + tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() + } tegra_stop_host(tegra); + } else if (from == OTG_STATE_B_PERIPHERAL && otg->gadget) usb_gadget_vbus_disconnect(otg->gadget); } else if (to == OTG_STATE_B_PERIPHERAL && otg->gadget) { if (from == OTG_STATE_A_SUSPEND) usb_gadget_vbus_connect(otg->gadget); } else if (to == OTG_STATE_A_HOST) { - if (from == OTG_STATE_A_SUSPEND) - tegra_start_host(tegra); + //if (from != OTG_STATE_A_HOST) + if (from == OTG_STATE_A_SUSPEND) { + if (tegra->charger_cb) { + //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h\n"); + tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() + } + //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h after\n"); + tegra_start_host(tegra); + } } + dev_info(tegra->otg.dev, "done\n"); } From e86e42c29947a30e286007ce902778fb29d0bf8d Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Mon, 28 Jan 2013 12:36:05 +0100 Subject: [PATCH 457/678] USB AUDIO as secondary sound device Change-Id: I883f21a1bedc416fc8d192050cdfb5c2232dc084 --- drivers/base/dd.c | 15 ++++++++++++--- sound/usb/card.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 6658da743c3..7c8155d234f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -110,7 +110,7 @@ static int really_probe(struct device *dev, struct device_driver *drv) int ret = 0; atomic_inc(&probe_count); - pr_debug("bus: '%s': %s: probing driver %s with device %s\n", + pr_debug("dd.c bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); WARN_ON(!list_empty(&dev->devres_head)); @@ -201,10 +201,11 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) { int ret = 0; + //pr_info("dd.c driver_probe_device\n"); if (!device_is_registered(dev)) return -ENODEV; - pr_debug("bus: '%s': %s: matched device %s with driver %s\n", + pr_debug("dd.c bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); pm_runtime_get_noresume(dev); @@ -305,9 +306,17 @@ static int __driver_attach(struct device *dev, void *data) * returns 0 and the @dev->driver is set, we've found a * compatible pair. */ + +// tmtmtm +struct device_driver *current_drv = NULL; + int driver_attach(struct device_driver *drv) { - return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); + // called by driver.c store_new_id() + usb_store_new_id() + // tmtmtm + current_drv = drv; + return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); + // --> probe, real_probe -> snd_usb_audio_probe } EXPORT_SYMBOL_GPL(driver_attach); diff --git a/sound/usb/card.c b/sound/usb/card.c index d8f2bf40145..54b9f1f5465 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,8 @@ #include #include #include +#include // tmtmtm +#include // tmtmtm #include "usbaudio.h" #include "card.h" @@ -74,6 +77,12 @@ MODULE_DESCRIPTION("USB Audio"); MODULE_LICENSE("GPL"); MODULE_SUPPORTED_DEVICE("{{Generic,USB Audio}}"); +// tmtmtm +struct timer_list my_timer; +struct usb_device *postpone_usb_snd_dev = NULL; +struct device_driver *postpone_usb_snd_drv = NULL; +extern struct device_driver *current_drv; + static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ @@ -423,6 +432,24 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx, return 0; } +//tmtmtm +static int mykthread(void *unused) +{ + printk("##### sound/usb/card.c mykthread driver_attach\n"); + if(postpone_usb_snd_drv!=NULL) + driver_attach(postpone_usb_snd_drv); +} +static void delayed_func(unsigned long unused) +{ + printk("##### sound/usb/card.c delayed_func driver_attach\n"); + + // Must offload to another thread, in order to prevent "BUG: scheduling while atomic" + // "calling block IO api(generic_make_request) from a soft irq thread (read callback) is a bad idea" + int ret = kernel_thread(mykthread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND | SIGCHLD); + printk("##### sound/usb/card.c delayed_func ret=%d\n",ret); +} + + /* * probe the active usb device * @@ -445,6 +472,7 @@ snd_usb_audio_probe(struct usb_device *dev, int ifnum; u32 id; + printk("##### sound/usb/card.c snd_usb_audio_probe\n"); alts = &intf->altsetting[0]; ifnum = get_iface_desc(alts)->bInterfaceNumber; id = USB_ID(le16_to_cpu(dev->descriptor.idVendor), @@ -452,6 +480,25 @@ snd_usb_audio_probe(struct usb_device *dev, if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; + // tmtmtm: we don't want the USB DAC to become the primary sound card + // in order for a USB DAC, connected at boot time, to become available as + // an *overlay* primary sound card, we must postpone device probe + + struct timespec tp; ktime_get_ts(&tp); + if (tp.tv_sec<8 && postpone_usb_snd_dev==NULL) { + printk("##### sound/usb/card.c DON'T REGISTER EARLY tv_sec=%d ++++++++++++++++++++\n",tp.tv_sec); + postpone_usb_snd_dev = dev; + postpone_usb_snd_drv = current_drv; + init_timer(&my_timer); + my_timer.expires = jiffies + 20*HZ; // n*HZ = delay in number of seconds + my_timer.function = delayed_func; + add_timer(&my_timer); + printk("##### sound/usb/card.c delayed call to driver_attach initiated\n"); + goto __err_val; + } + //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); + + if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) goto __err_val; From a90788f25543b37e3c831bdbe582673dc7ffa604 Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Mon, 28 Jan 2013 21:07:54 +0100 Subject: [PATCH 458/678] disabled dac as secondary Change-Id: Iccb49e9175dc98d5a0bf5105cc2ffba119343fd6 --- drivers/power/smb347-charger.c | 2 +- sound/usb/card.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index e752633e2ef..cf4828f27ef 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -131,7 +131,7 @@ static unsigned int pcba_ver; static int gpio_dock_in = 0; // tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 -static int fixed_install_mode = 0; +static int fixed_install_mode = 1; volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; diff --git a/sound/usb/card.c b/sound/usb/card.c index 54b9f1f5465..6fb41f99965 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -479,7 +479,7 @@ snd_usb_audio_probe(struct usb_device *dev, le16_to_cpu(dev->descriptor.idProduct)); if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; - +/* // tmtmtm: we don't want the USB DAC to become the primary sound card // in order for a USB DAC, connected at boot time, to become available as // an *overlay* primary sound card, we must postpone device probe @@ -497,7 +497,7 @@ snd_usb_audio_probe(struct usb_device *dev, goto __err_val; } //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); - +*/ if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) goto __err_val; From 77bb2604802398e65ed288f81577ca6323863cad Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Wed, 30 Jan 2013 21:40:49 +0100 Subject: [PATCH 459/678] usbhost kconfig Change-Id: Ifd9da243efa6b3d5703d6245394e59c5e9901fdd --- arch/arm/mach-tegra/Kconfig | 7 +++ arch/arm/mach-tegra/Makefile | 2 + arch/arm/mach-tegra/usbhost.c | 96 ++++++++++++++++++++++++++++++++++ drivers/power/smb347-charger.c | 9 ++-- drivers/usb/host/ehci-tegra.c | 1 - sound/usb/card.c | 4 +- 6 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 arch/arm/mach-tegra/usbhost.c diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index d1aec45cb99..558577ff6b9 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -635,4 +635,11 @@ config TEGRA_PREINIT_CLOCKS help Preinitialize Tegra clocks to known states before actual full- scale clock initialization starts. + +config USBHOST + bool "USBHOST related runtime configuration" + default y + help + A simple sysfs interface to allow switching between OTG and FI mode. + endif diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index 9ea2aefe87a..a6fe3553ad1 100755 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -28,6 +28,8 @@ obj-y += pm.o obj-$(CONFIG_TEGRA_WDT_RECOVERY) += wdt-recovery.o obj-$(CONFIG_PM_SLEEP) += pm-irq.o obj-y += gic.o +#obj-y += otgfi.o # tmtmtm +obj-$(CONFIG_USBHOST) += usbhost.o # tmtmtm obj-y += sleep.o diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c new file mode 100644 index 00000000000..835bd0ebd49 --- /dev/null +++ b/arch/arm/mach-tegra/usbhost.c @@ -0,0 +1,96 @@ +#include +#include + +int fixed_install_mode = 0; + +static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", fixed_install_mode); +} + +static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + sscanf(buf, "%du", &fixed_install_mode); + return count; +} + +static struct kobj_attribute fixed_install_mode_attribute = + __ATTR(fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); + + + +int hotplug_on_boot = 0; + +static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", hotplug_on_boot); +} + +static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + sscanf(buf, "%du", &hotplug_on_boot); + return count; +} + +static struct kobj_attribute hotplug_on_boot_attribute = + __ATTR(hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); + + + +int fastcharge_in_host_mode = 0; + +static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", fastcharge_in_host_mode); +} + +static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + sscanf(buf, "%du", &fastcharge_in_host_mode); + return count; +} + +static struct kobj_attribute fastcharge_in_host_mode_attribute = + __ATTR(fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); + + + +static struct attribute *attrs[] = { + &fixed_install_mode_attribute.attr, + &hotplug_on_boot_attribute.attr, + &fastcharge_in_host_mode_attribute.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +static struct kobject *usbhost_kobj; + +int usbhost_init(void) +{ + int retval; + + fixed_install_mode = 0; + hotplug_on_boot = 0; + fastcharge_in_host_mode = 0; + + usbhost_kobj = kobject_create_and_add("usbhost", kernel_kobj); + if (!usbhost_kobj) { + return -ENOMEM; + } + retval = sysfs_create_group(usbhost_kobj, &attr_group); + if (retval) + kobject_put(usbhost_kobj); + return retval; +} + +void usbhost_exit(void) +{ + kobject_put(usbhost_kobj); +} + +module_init(usbhost_init); +module_exit(usbhost_exit); + diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index cf4828f27ef..9f10ec17ad9 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -131,7 +131,8 @@ static unsigned int pcba_ver; static int gpio_dock_in = 0; // tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 -static int fixed_install_mode = 1; +//static int fixed_install_mode = 0; +extern int fixed_install_mode; volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; @@ -1197,7 +1198,7 @@ static void inok_isr_work_function(struct work_struct *dat) if(!lastExternalPowerState) { // make external power detectable in case it is coming back - printk("inok_isr_work_function make external power detectable\n"); + printk("inok_isr_work_function make external power detectable1\n"); int ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -1224,11 +1225,13 @@ static void inok_isr_work_function(struct work_struct *dat) } // make external power detectable - printk("inok_isr_work_function make external power detectable\n"); + printk("inok_isr_work_function make external power detectable2\n"); + // 2013-01-28: crash here after int ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); + printk("inok_isr_work_function make external power detectable2 done\n"); return; } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index bf728f259ef..24443ebf5b6 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -632,7 +632,6 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) // tmtmtm: OTG PLUG // original intent: skip the default restart, if host_mode_charging is set - // FIXME: hier keine einschränkung auf fixed_install_mode? //if(host_mode_charging_state) { // printk("ehci-tegra ######### tegra_usb_resume host_mode_charging: special\n"); if(smb347_deep_sleep) { diff --git a/sound/usb/card.c b/sound/usb/card.c index 6fb41f99965..54b9f1f5465 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -479,7 +479,7 @@ snd_usb_audio_probe(struct usb_device *dev, le16_to_cpu(dev->descriptor.idProduct)); if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; -/* + // tmtmtm: we don't want the USB DAC to become the primary sound card // in order for a USB DAC, connected at boot time, to become available as // an *overlay* primary sound card, we must postpone device probe @@ -497,7 +497,7 @@ snd_usb_audio_probe(struct usb_device *dev, goto __err_val; } //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); -*/ + if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) goto __err_val; From 04b1bedb71ecc3d0f8e2ec6d5087895a8f9a0120 Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Thu, 31 Jan 2013 15:55:12 +0100 Subject: [PATCH 460/678] added fast-charging in hostmode support Change-Id: I1eb0122a8e7a4efc7bbc1f089972edb8641b6a9a --- arch/arm/mach-tegra/usbhost.c | 32 ++++++++++---------- drivers/power/smb347-charger.c | 54 ++++++++++++++++++++++++---------- drivers/usb/otg/tegra-otg.c | 6 ++-- 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 835bd0ebd49..bff07c37d9a 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -1,57 +1,59 @@ #include #include -int fixed_install_mode = 0; +// TODO: need to persist all 3 values + +int usbhost_fixed_install_mode = 0; static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", fixed_install_mode); + return sprintf(buf, "%d\n", usbhost_fixed_install_mode); } static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - sscanf(buf, "%du", &fixed_install_mode); + sscanf(buf, "%du", &usbhost_fixed_install_mode); return count; } static struct kobj_attribute fixed_install_mode_attribute = - __ATTR(fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); + __ATTR(usbhost_fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); -int hotplug_on_boot = 0; +int usbhost_hotplug_on_boot = 0; static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", hotplug_on_boot); + return sprintf(buf, "%d\n", usbhost_hotplug_on_boot); } static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - sscanf(buf, "%du", &hotplug_on_boot); + sscanf(buf, "%du", &usbhost_hotplug_on_boot); return count; } static struct kobj_attribute hotplug_on_boot_attribute = - __ATTR(hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); + __ATTR(usbhost_hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); -int fastcharge_in_host_mode = 0; +int usbhost_fastcharge_in_host_mode = 0; static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", fastcharge_in_host_mode); + return sprintf(buf, "%d\n", usbhost_fastcharge_in_host_mode); } static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - sscanf(buf, "%du", &fastcharge_in_host_mode); + sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); return count; } static struct kobj_attribute fastcharge_in_host_mode_attribute = - __ATTR(fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); + __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); @@ -72,9 +74,9 @@ int usbhost_init(void) { int retval; - fixed_install_mode = 0; - hotplug_on_boot = 0; - fastcharge_in_host_mode = 0; + usbhost_fixed_install_mode = 1; + usbhost_hotplug_on_boot = 0; + usbhost_fastcharge_in_host_mode = 0; usbhost_kobj = kobject_create_and_add("usbhost", kernel_kobj); if (!usbhost_kobj) { diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 9f10ec17ad9..cb6f71c0e08 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -132,7 +132,14 @@ static int gpio_dock_in = 0; // tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 //static int fixed_install_mode = 0; -extern int fixed_install_mode; +extern int usbhost_fixed_install_mode; +extern int usbhost_fastcharge_in_host_mode; +// TODO: need select threads to dicover value changes +// when usbhost_fixed_install_mode from 1 to 0: enable slave charging +// when usbhost_fixed_install_mode from 0 to 1: disable slave charging +// when usbhost_fastcharge_in_host_mode from 0 to 1: enable fast charging (when in host mode) +// when usbhost_fastcharge_in_host_mode from 1 to 0: disable fast charging (when in host mode) + volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; @@ -268,7 +275,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } // tmtmtm: we will never charge slave devices in fixed_install_mode - if(!fixed_install_mode) { + if(!usbhost_fixed_install_mode) { if(chargeSlaves) { if(!lastChargeSlaveDevicesState) { /* Configure INOK to be active high */ @@ -325,7 +332,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } // tmtmtm: we will never charge slave devices in fixed_install_mode - if(!fixed_install_mode) { + if(!usbhost_fixed_install_mode) { if(chargeSlaves) { if(!lastChargeSlaveDevicesState) { /* Change "OTG output current limit" from 250mA to 750mA */ @@ -405,7 +412,7 @@ static int smb347_configure_charger(struct i2c_client *client, int value) */ } else { // tmtmtm: make sure to NEVER call this in fixed_install_mode - printk("smb347_configure_charger do not charge; fixed_install_mode=%d\n",fixed_install_mode); + printk("smb347_configure_charger do not charge; fixed_install_mode=%d\n",usbhost_fixed_install_mode); ret = smb347_read(client, smb347_CMD_REG); if (ret < 0) { dev_err(&client->dev, "%s: err %d\n", __func__, ret); @@ -467,6 +474,8 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) if (charger->curr_limit == current_limit) return ret; + printk("smb347_set_InputCurrentlimit %d\n",current_limit); + wake_lock(&charger_wakelock); /* Enable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -874,11 +883,11 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo int newExternalPowerState=0; printk("smb347_otg_status from=%d to=%d lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", - from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,fixed_install_mode); + from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,usbhost_fixed_install_mode); if(to==10) { - // only when going suspend (OTG PULL) - // small sleep, so that ehci-tegra #### tegra_usb_resume can run first + // prevent race condition bug: only when going suspend (OTG PULL) + // insert small sleep, so that ehci-tegra #### tegra_usb_resume can run first // and use host_mode_charging_state's current value (probably charging), // before we call cable_type_detect() (when it will likely switch to not charging) // FIXME: but is tegra_usb_resume not only called on OTG PLUG? @@ -895,7 +904,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo if(!newExternalPowerState) { // no external power - if(fixed_install_mode) { + if(usbhost_fixed_install_mode) { // allow battery to be charged printk("smb347_otg_status allow battery to be charged\n"); ret = smb347_configure_charger(client, 1); @@ -925,6 +934,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo printk("smb347_otg_status waiting for external power...\n"); // if power is detected, inok_isr_work_function will strike after aprox 1500 ms schedule_timeout_interruptible(msecs_to_jiffies(500)); + // FIXME: abort condition schedule_timeout_interruptible(msecs_to_jiffies(500)); schedule_timeout_interruptible(msecs_to_jiffies(400)); schedule_timeout_interruptible(msecs_to_jiffies(400)); @@ -1057,7 +1067,7 @@ static int cable_type_detect(void) // tmtmtm charger->cur_cable_type = ac_cable; - if(fixed_install_mode) { + if(usbhost_fixed_install_mode) { host_mode_charging_state = 1; printk(KERN_INFO "cable_type_detect() enabled host_mode_charging_state on DC_IN ######\n"); } @@ -1080,6 +1090,7 @@ static int cable_type_detect(void) touch_callback(ac_cable); #endif } else if (retval == APSD_DCP) { + // Asus power supply printk(KERN_INFO "Cable: DCP\n"); charger->cur_cable_type = ac_cable; success = battery_callback(ac_cable); @@ -1101,13 +1112,24 @@ static int cable_type_detect(void) touch_callback(usb_cable); #endif } else if(retval == APSD_HOST_MODE_CHARGING) { // tmtmtm - printk(KERN_INFO "Cable: host mode charging\n"); - charger->cur_cable_type = usb_cable; - success = battery_callback(usb_cable); - host_mode_charging_state = 1; // tmtmtm + + if(usbhost_fastcharge_in_host_mode) { + printk(KERN_INFO "Cable: host mode charging ac\n"); + charger->cur_cable_type = ac_cable; + success = battery_callback(ac_cable); #ifdef TOUCH_CALLBACK_ENABLED - touch_callback(usb_cable); + touch_callback(ac_cable); +#endif + } else { + printk(KERN_INFO "Cable: host mode charging usb\n"); + charger->cur_cable_type = usb_cable; + success = battery_callback(usb_cable); +#ifdef TOUCH_CALLBACK_ENABLED + touch_callback(usb_cable); #endif + } + host_mode_charging_state = 1; // tmtmtm + } else { charger->cur_cable_type = unknow_cable; printk(KERN_INFO "Unkown Plug In Cable type !\n"); @@ -1125,7 +1147,7 @@ static int cable_type_detect(void) printk(KERN_INFO "USBIN=0\n"); // tmtmtm: battery tab keeps stating "Charging (AC)" - if(fixed_install_mode) { + if(usbhost_fixed_install_mode) { host_mode_charging_state = 0; printk(KERN_INFO "cable_type_detect() disabled host_mode_charging_state ############\n"); } @@ -1162,7 +1184,7 @@ static void inok_isr_work_function(struct work_struct *dat) cancel_delayed_work(&charger->inok_isr_work); // tmtmtm: no external power: in fixed_install_mode we prepare for power to come back - if(fixed_install_mode) { + if(usbhost_fixed_install_mode) { smb347_clear_interrupts(client); // stop host-mode, don't chargeSlaves, don't stopChargeSlaves diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index def2b91bebe..9be485e8b21 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -233,8 +233,10 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - smb347_deep_sleep = 0; - dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); + if(smb347_deep_sleep>0) { + smb347_deep_sleep = 0; + dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); + } // tmtmtm if (to == OTG_STATE_A_SUSPEND) { From 083176f993e482bc6bb99c6b4850c89bcb545904 Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Thu, 31 Jan 2013 18:38:14 +0100 Subject: [PATCH 461/678] changes to allow dynamic switching of usbhost_fixed_install_mode; resolving issues around lastChargeSlaveDevicesState Change-Id: I7b06694582192b4281fdd50b6d49d9bdc1ef77ec --- drivers/power/smb347-charger.c | 48 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index cb6f71c0e08..50afc8c9203 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -347,25 +347,25 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); } } - else - if(stopChargeSlaves) { - if(lastChargeSlaveDevicesState) { - //printk("smb347_configure_otg stop charging slaves\n"); - /* Configure INOK to be active low */ - ret = smb347_read(client, smb347_SYSOK_USB3); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } + } - ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - lastChargeSlaveDevicesState = 0; - printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); + if(stopChargeSlaves) { + if(lastChargeSlaveDevicesState) { + //printk("smb347_configure_otg stop charging slaves\n"); + /* Configure INOK to be active low */ + ret = smb347_read(client, smb347_SYSOK_USB3); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; } + + ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + lastChargeSlaveDevicesState = 0; + printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); } } @@ -913,7 +913,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo "otg..\n", __func__); // disableOTG, dont chargeSlaves, don't stopChargeSlaves printk("smb347_otg_status disableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); - ret = smb347_configure_otg(client, 0, 0, 0); + ret = smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); @@ -971,7 +971,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo "otg..\n", __func__); // enableOTG, don't chargeSlaves, don't stopChargeSlaves printk("smb347_otg_status enableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); - ret = smb347_configure_otg(client, 1, 0, 0); + ret = smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); @@ -997,12 +997,14 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo //if(!newExternalPowerState /*&& !lastChargeSlaveDevicesState*/) { // make external power detectable in case it is coming back + // FIXME: probably better do this only, if lastChargeSlaveDevicesState is not set + if(!lastChargeSlaveDevicesState) { printk("smb347_otg_status make external power detectable\n"); ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); - //} + } lastExternalPowerState = newExternalPowerState; printk("smb347_otg_status DONE lastOtgState=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", @@ -1189,7 +1191,7 @@ static void inok_isr_work_function(struct work_struct *dat) // stop host-mode, don't chargeSlaves, don't stopChargeSlaves printk("inok_isr_work_function fixed_install stop host-mode, don't chargeSlaves, don't stopChargeSlaves\n"); - if(smb347_configure_otg(client, 0, 0, 0)<0) + if(smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState)<0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); @@ -1218,7 +1220,7 @@ static void inok_isr_work_function(struct work_struct *dat) smb347_otg_status(OTG_STATE_A_HOST,OTG_STATE_A_HOST,NULL); } - if(!lastExternalPowerState) { + if(!lastExternalPowerState && !lastChargeSlaveDevicesState) { // make external power detectable in case it is coming back printk("inok_isr_work_function make external power detectable1\n"); int ret = smb347_configure_interrupts(client); @@ -1263,7 +1265,7 @@ static void inok_isr_work_function(struct work_struct *dat) // host_mode_charging_state may have been set by cable_type_detect() if(host_mode_charging_state>0 && lastOtgState==0) { printk("inok_isr_work_function external power available, start host mode\n"); - if(smb347_configure_otg(client, 1, 0, 0)<0) + if(smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState)<0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); } From 1ae2442ea8026605fc90b2aac9f82c4e7d07799a Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Fri, 1 Feb 2013 15:17:19 +0100 Subject: [PATCH 462/678] fast charge switch Change-Id: Ia42c6743f1cf7d627c980a80d76a1de9c1f0bc65 --- arch/arm/mach-tegra/usbhost.c | 41 ++++++----- drivers/power/smb347-charger.c | 127 ++++++++++++++++++++++++++++----- 2 files changed, 133 insertions(+), 35 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index bff07c37d9a..145f5bcd70d 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -1,8 +1,12 @@ #include #include +extern int smb347_event_fi(void); +extern int smb347_event_fastcharge(void); + // TODO: need to persist all 3 values +/* ----------------------------------------- */ int usbhost_fixed_install_mode = 0; static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -13,14 +17,34 @@ static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribu static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { sscanf(buf, "%du", &usbhost_fixed_install_mode); + // TODO: event -> smb347_charger.c + smb347_event_fi(); return count; } static struct kobj_attribute fixed_install_mode_attribute = __ATTR(usbhost_fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); +/* ----------------------------------------- */ +int usbhost_fastcharge_in_host_mode = 0; + +static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", usbhost_fastcharge_in_host_mode); +} + +static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); + // TODO: event -> smb347_charger.c + smb347_event_fastcharge(); + return count; +} +static struct kobj_attribute fastcharge_in_host_mode_attribute = + __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); +/* ----------------------------------------- */ int usbhost_hotplug_on_boot = 0; static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -39,23 +63,6 @@ static struct kobj_attribute hotplug_on_boot_attribute = -int usbhost_fastcharge_in_host_mode = 0; - -static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", usbhost_fastcharge_in_host_mode); -} - -static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) -{ - sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); - return count; -} - -static struct kobj_attribute fastcharge_in_host_mode_attribute = - __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); - - static struct attribute *attrs[] = { &fixed_install_mode_attribute.attr, diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 50afc8c9203..e3a3a8a41e4 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -110,6 +110,10 @@ #define ADAPTER_PROTECT_DELAY (4*HZ) #define GPIO_AC_OK TEGRA_GPIO_PV1 +/* Global functions declaration */ +int smb347_event_fi(void); +int smb347_event_fastcharge(void); + /* Functions declaration */ static int smb347_configure_charger(struct i2c_client *client, int value); static int smb347_configure_interrupts(struct i2c_client *client); @@ -134,11 +138,6 @@ static int gpio_dock_in = 0; //static int fixed_install_mode = 0; extern int usbhost_fixed_install_mode; extern int usbhost_fastcharge_in_host_mode; -// TODO: need select threads to dicover value changes -// when usbhost_fixed_install_mode from 1 to 0: enable slave charging -// when usbhost_fixed_install_mode from 0 to 1: disable slave charging -// when usbhost_fastcharge_in_host_mode from 0 to 1: enable fast charging (when in host mode) -// when usbhost_fastcharge_in_host_mode from 1 to 0: disable fast charging (when in host mode) volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; @@ -264,7 +263,8 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch { int ret = 0; - printk("smb347_configure_otg %d %d %d %d\n",enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); + printk("smb347_configure_otg otg=%d chargeSlaves=%d stopSlaves=%d lastOtg=%d\n", + enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); /*Enable volatile writes to registers*/ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -519,7 +519,7 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) else setting |= 0x03; - printk(KERN_INFO "[charger] set cahrger limmit, limit=%u retval =%x setting=%x\n", + printk(KERN_INFO "[charger] set charger limit, limit=%u retval =%x setting=%x\n", current_limit, retval, setting); ret = smb347_write(client, smb347_CHRG_CRNTS, setting); @@ -925,7 +925,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // we actually depend on it to arrive in parallel // make external power detectable in case it is coming back - printk("smb347_otg_status make external power detectable\n"); + printk("smb347_otg_status make external power detectable1\n"); ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -999,7 +999,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // make external power detectable in case it is coming back // FIXME: probably better do this only, if lastChargeSlaveDevicesState is not set if(!lastChargeSlaveDevicesState) { - printk("smb347_otg_status make external power detectable\n"); + printk("smb347_otg_status make external power detectable2\n"); ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -1176,6 +1176,7 @@ static void inok_isr_work_function(struct work_struct *dat) struct i2c_client *client = charger->client; // called on power loss/gain, but also if just a bare (non-powered) OTG adapter is pulled + // also if FI is disabled via sysfs printk("inok_isr_work_function lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); @@ -1235,7 +1236,9 @@ static void inok_isr_work_function(struct work_struct *dat) } // we were NOT in externally powered host mode - cable_type_detect(); + if(!lastChargeSlaveDevicesState) { + cable_type_detect(); + } if(charger->cur_cable_type!=1 && charger->cur_cable_type!=3) { // still no power incoming printk("inok_isr_work_function no power lastExternalPowerState=%d\n",lastExternalPowerState); @@ -1248,14 +1251,16 @@ static void inok_isr_work_function(struct work_struct *dat) lastExternalPowerState = 0; } - // make external power detectable - printk("inok_isr_work_function make external power detectable2\n"); - // 2013-01-28: crash here after - int ret = smb347_configure_interrupts(client); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - printk("inok_isr_work_function make external power detectable2 done\n"); + if(!lastChargeSlaveDevicesState) { + // make external power detectable + printk("inok_isr_work_function make external power detectable2\n"); + // 2013-01-28: crash here after + int ret = smb347_configure_interrupts(client); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + printk("inok_isr_work_function make external power detectable2 done\n"); + } return; } @@ -1368,6 +1373,92 @@ static void smb347_default_setback(void) } } +int smb347_event_fi(void) { + // called by usbhost.c sysfs change from user space + struct i2c_client *client = charger->client; + printk("smb347_event_fi %d\n",usbhost_fixed_install_mode); + if(usbhost_fixed_install_mode>0) { + // from OTG to FI + // make external power detectable in case it is coming back + int ret = smb347_configure_interrupts(client); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + // battery will be charged + ret = smb347_configure_charger(client, 1); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + + // enable OTG, disable slave charging + if(smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState)<0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + + // inok_isr_work_function() will now be called + schedule_timeout_interruptible(msecs_to_jiffies(100)); + cable_type_detect(); + + // FIXME: switching from OTG to FI: does NOT remove power from slave (only briefly; needs OTG-cables to be pulled) + // will also NOT accept external power now + + // wenn ich anschliessend OTG ziehe und aufstecke, + // fährt alles hoch, nur der DAC wird nicht erkannt (device not accepting address 3, error -32) + // erst wenn ich dem DAC den strom ziehe, geht er wieder + // ergo: durch das aus- und einschalten von FI wird der DAC temporär gestört + + } else { + // from FI to OTG: enable slave charging + printk("enable slave charging lastExternalPowerState=%d\n",lastExternalPowerState); + // battery will NOT be charged + int ret = smb347_configure_charger(client, 0); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + if(lastExternalPowerState) { + cancel_delayed_work(&charger->curr_limit_work); + cancel_delayed_work(&charger->inok_isr_work); + smb347_clear_interrupts(client); + + // make device aware it is now discharging + lastExternalPowerState = 0; + } + // enableOTG, chargeSlaves, don't stopChargeSlaves + if(smb347_configure_otg(client, 1, 1, 0)<0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + // inok_isr_work_function() will now be called + + // FIXME: switching from FI to OTG (after power being removed): + // DOES power the slave, but slaves are NOT detected, not even when replugged + // music plays on speaker + } +} + + +int smb347_event_fastcharge(void) { + // called by usbhost.c sysfs change from user space + printk("smb347_event_fastcharge %d\n",usbhost_fastcharge_in_host_mode); + if(host_mode_charging_state>0) { + if(usbhost_fastcharge_in_host_mode) { + printk(KERN_INFO "host mode charging ac\n"); + charger->cur_cable_type = ac_cable; + battery_callback(ac_cable); +#ifdef TOUCH_CALLBACK_ENABLED + touch_callback(ac_cable); +#endif + } else { + printk(KERN_INFO "host mode charging usb\n"); + charger->cur_cable_type = usb_cable; + battery_callback(usb_cable); +#ifdef TOUCH_CALLBACK_ENABLED + touch_callback(usb_cable); +#endif + } + } +} + + static int __devinit smb347_probe(struct i2c_client *client, const struct i2c_device_id *id) { From 98a79c2760ee578a4cb36206870afc2e8b225510 Mon Sep 17 00:00:00 2001 From: Timur Mehrvarz Date: Fri, 1 Feb 2013 17:03:32 +0100 Subject: [PATCH 463/678] adding credentials Change-Id: I2c58b90e38ec7696e4767fa75077823ecf256cb3 --- arch/arm/mach-tegra/usbhost.c | 16 +++++++--------- drivers/power/smb347-charger.c | 28 ++++++++++------------------ drivers/usb/host/ehci-tegra.c | 30 ++---------------------------- drivers/usb/otg/tegra-otg.c | 12 ++---------- 4 files changed, 21 insertions(+), 65 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 145f5bcd70d..28c24b08e68 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -4,10 +4,11 @@ extern int smb347_event_fi(void); extern int smb347_event_fastcharge(void); -// TODO: need to persist all 3 values +// Copyright (C) 2013 Timur Mehrvarz +// TODO: need to persist all 3 usbhost_* values /* ----------------------------------------- */ -int usbhost_fixed_install_mode = 0; +int usbhost_fixed_install_mode; static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -17,7 +18,6 @@ static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribu static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { sscanf(buf, "%du", &usbhost_fixed_install_mode); - // TODO: event -> smb347_charger.c smb347_event_fi(); return count; } @@ -26,7 +26,7 @@ static struct kobj_attribute fixed_install_mode_attribute = __ATTR(usbhost_fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); /* ----------------------------------------- */ -int usbhost_fastcharge_in_host_mode = 0; +int usbhost_fastcharge_in_host_mode; static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -36,7 +36,6 @@ static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_at static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); - // TODO: event -> smb347_charger.c smb347_event_fastcharge(); return count; } @@ -45,7 +44,7 @@ static struct kobj_attribute fastcharge_in_host_mode_attribute = __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); /* ----------------------------------------- */ -int usbhost_hotplug_on_boot = 0; +int usbhost_hotplug_on_boot; static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -61,9 +60,7 @@ static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute static struct kobj_attribute hotplug_on_boot_attribute = __ATTR(usbhost_hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); - - - +/* ----------------------------------------- */ static struct attribute *attrs[] = { &fixed_install_mode_attribute.attr, &hotplug_on_boot_attribute.attr, @@ -81,6 +78,7 @@ int usbhost_init(void) { int retval; + // default values usbhost_fixed_install_mode = 1; usbhost_hotplug_on_boot = 0; usbhost_fastcharge_in_host_mode = 0; diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index e3a3a8a41e4..0ec5da60d7d 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -4,6 +4,7 @@ * Battery charger driver for smb347 from summit microelectronics * * Copyright (c) 2012, NVIDIA Corporation. + * Copyright (C) 2013 Timur Mehrvarz * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -134,8 +135,6 @@ static unsigned int project_id; static unsigned int pcba_ver; static int gpio_dock_in = 0; -// tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 -//static int fixed_install_mode = 0; extern int usbhost_fixed_install_mode; extern int usbhost_fastcharge_in_host_mode; @@ -331,7 +330,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } } - // tmtmtm: we will never charge slave devices in fixed_install_mode + // tmtmtm: we never charge slave devices in fixed_install_mode if(!usbhost_fixed_install_mode) { if(chargeSlaves) { if(!lastChargeSlaveDevicesState) { @@ -351,7 +350,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch if(stopChargeSlaves) { if(lastChargeSlaveDevicesState) { - //printk("smb347_configure_otg stop charging slaves\n"); + printk("smb347_configure_otg stop charging slaves\n"); /* Configure INOK to be active low */ ret = smb347_read(client, smb347_SYSOK_USB3); if (ret < 0) { @@ -783,7 +782,7 @@ int smb347_hc_mode_callback(bool enable, int cur) } } - /* Disable volatile writes to registers */ + /* Disable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); if (ret < 0) { dev_err(&client->dev, "%s() error in configuring charger..\n", @@ -795,8 +794,6 @@ int smb347_hc_mode_callback(bool enable, int cur) return ret; error: - //if(ret!=0) - // printk(KERN_INFO "smb347_hc_mode_callback ERROR %d\n",ret); return ret; } EXPORT_SYMBOL_GPL(smb347_hc_mode_callback); @@ -891,9 +888,9 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // and use host_mode_charging_state's current value (probably charging), // before we call cable_type_detect() (when it will likely switch to not charging) // FIXME: but is tegra_usb_resume not only called on OTG PLUG? - // FIXME: do I mean "so that tegra_ehci_irq() can run first" ? + // FIXME: better "so that tegra_ehci_irq() can run first" ? schedule_timeout_interruptible(msecs_to_jiffies(100)); - // when doing this pause, smb347_resume() will call cable_type_detect() before we do below + // pausing here, smb347_resume() will call cable_type_detect() before we do (see: below) } cable_type_detect(); @@ -921,9 +918,6 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // tmtmtm: mobile-mode: we need to be careful NOT to disable charger detection too early // once we start charging slaves ourselfs, we will not be able to detect ext power coming in - // also: why are we waiting here, if inok_isr_work_function is called on power - // we actually depend on it to arrive in parallel - // make external power detectable in case it is coming back printk("smb347_otg_status make external power detectable1\n"); ret = smb347_configure_interrupts(client); @@ -995,9 +989,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo } } - //if(!newExternalPowerState /*&& !lastChargeSlaveDevicesState*/) { - // make external power detectable in case it is coming back - // FIXME: probably better do this only, if lastChargeSlaveDevicesState is not set + // make external power detectable in case it is coming back if(!lastChargeSlaveDevicesState) { printk("smb347_otg_status make external power detectable2\n"); ret = smb347_configure_interrupts(client); @@ -1130,7 +1122,7 @@ static int cable_type_detect(void) touch_callback(usb_cable); #endif } - host_mode_charging_state = 1; // tmtmtm + host_mode_charging_state = 1; // tmtmtm } else { charger->cur_cable_type = unknow_cable; @@ -1148,7 +1140,7 @@ static int cable_type_detect(void) charger->cur_cable_type = unknow_cable; printk(KERN_INFO "USBIN=0\n"); - // tmtmtm: battery tab keeps stating "Charging (AC)" + // tmtmtm: fix: battery tab keeps stating "Charging (AC)" if(usbhost_fixed_install_mode) { host_mode_charging_state = 0; printk(KERN_INFO "cable_type_detect() disabled host_mode_charging_state ############\n"); @@ -1217,7 +1209,7 @@ static void inok_isr_work_function(struct work_struct *dat) // so we can continue host mode in OTG mode // if we would NOT call smb347_otg_status() here, slave devices would stay without power now -// tmtmtm: we don't want to call this, if OTG-adapter is pulled (not just power) + // tmtmtm: we may not want to call this, if the OTG-adapter is pulled (not just power) smb347_otg_status(OTG_STATE_A_HOST,OTG_STATE_A_HOST,NULL); } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 24443ebf5b6..53fbac3e5c1 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -3,6 +3,7 @@ * * Copyright (C) 2010 Google, Inc. * Copyright (C) 2009 - 2011 NVIDIA Corporation + * Copyright (C) 2013 Timur Mehrvarz * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -59,8 +60,6 @@ extern void baseband_xmm_L3_resume_check(void); extern volatile int smb347_deep_sleep; // tmtmtm: from smb347-charger.c -//extern volatile int host_mode_charging_state; // tmtmtm: from smb347-charger.c -//extern int fixed_install_mode; // tmtmtm: from smb347-charger.c static struct usb_hcd *modem_ehci_handle; struct tegra_ehci_hcd { @@ -222,27 +221,15 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } else if (tegra->bus_suspended && tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { - // tmtmtm: OTG UNPLUG - // original intent: when waking up from deep sleep, skip the default return, - // if host_mode_charging AND fixed_install_mode are set - //if(host_mode_charging_state && fixed_install_mode) { - // printk("ehci-tegra %s waking up with host_mode_charging: special\n", __func__); if(smb347_deep_sleep) { printk("ehci-tegra %s wake-up/OTG-UNPLUG with smb347_deep_sleep: special\n", __func__); // fix: skip default return - - // FIXME - // DAS EINSCHRÄNKEN AUF DEN fixed_install_mode löst das problem nur im MOBILE kernel - // ECHTE LÖSUNG: ONLY skip default return when really waking up from deep sleep - // das kann sowohl bei unplug als auch bei plug passieren } else { - printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal return\n", __func__); spin_unlock(&ehci->lock); return 0; } } spin_unlock(&ehci->lock); - //printk("ehci-tegra %s post spin_unlock\n", __func__); } irq_status = ehci_irq(hcd); @@ -252,15 +239,12 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } if (ehci->controller_remote_wakeup) { - //printk("ehci-tegra %s ehci->controller_remote_wakeup\n", __func__); ehci->controller_remote_wakeup = false; /* disable interrupts */ ehci_writel(ehci, 0, &ehci->regs->intr_enable); tegra_usb_phy_preresume(tegra->phy, true); tegra->port_resuming = 1; - //printk("ehci-tegra %s ehci->controller_remote_wakeup done\n", __func__); } - //printk("ehci-tegra %s return irq_status=%d\n", __func__,irq_status); return irq_status; } @@ -630,21 +614,11 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) tegra_ehci_power_up(hcd, is_dpd); set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); - // tmtmtm: OTG PLUG - // original intent: skip the default restart, if host_mode_charging is set - //if(host_mode_charging_state) { - // printk("ehci-tegra ######### tegra_usb_resume host_mode_charging: special\n"); if(smb347_deep_sleep) { printk("ehci-tegra %s wake-up/OTG-PLUG with smb347_deep_sleep: special\n", __func__); - // kommt u.a. - // wenn der gepowerte OTG adapter gesteckt wird (mobile-use kernel) - // fix: skip default restart } else if ((tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) || (hsic) || - (null_ulpi)) - { - //printk("ehci-tegra #### tegra_usb_resume !host_mode_charging: restart\n"); - printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal restart\n", __func__); + (null_ulpi)) { goto restart; } diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index 9be485e8b21..bfc97d04fc0 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -5,6 +5,7 @@ * * Copyright (C) 2010 NVIDIA Corp. * Copyright (C) 2010 Google, Inc. + * Copyright (C) 2013 Timur Mehrvarz * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -168,7 +169,6 @@ void tegra_start_host(struct tegra_otg_data *tegra) void tegra_stop_host(struct tegra_otg_data *tegra) { - //dev_info(tegra->otg.dev, "tegra_stop_host\n"); if (tegra->pdev) { tegra_usb_otg_host_unregister(tegra->pdev); tegra->pdev = NULL; @@ -233,17 +233,11 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - if(smb347_deep_sleep>0) { - smb347_deep_sleep = 0; - dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); - } - // tmtmtm + smb347_deep_sleep = 0; if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) { - //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s before\n"); if (tegra->charger_cb) { - //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s\n"); tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() } tegra_stop_host(tegra); @@ -257,10 +251,8 @@ static void irq_work(struct work_struct *work) //if (from != OTG_STATE_A_HOST) if (from == OTG_STATE_A_SUSPEND) { if (tegra->charger_cb) { - //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h\n"); tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() } - //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h after\n"); tegra_start_host(tegra); } } From ed98ab3a869754eef2c8d29509f1d71531b2ecc7 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 13 Apr 2013 18:29:21 -0400 Subject: [PATCH 464/678] enabled FTDI; added sysfs usbhost_hostmode; usbhost_hotplug_on_boot=1 as default; card.c now evaluating usbhost_hotplug_on_boot --- arch/arm/mach-tegra/usbhost.c | 22 ++++++++++++++- drivers/power/smb347-charger.c | 39 ++++++++++++++------------- sound/usb/card.c | 49 +++++++++++++++++++++------------- 3 files changed, 72 insertions(+), 38 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 28c24b08e68..38c87df3a2c 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -60,11 +60,29 @@ static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute static struct kobj_attribute hotplug_on_boot_attribute = __ATTR(usbhost_hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); +/* ----------------------------------------- */ +int usbhost_hostmode; + +static ssize_t hostmode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", usbhost_hostmode); +} + +static ssize_t hostmode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + sscanf(buf, "%du", &usbhost_hostmode); + return count; +} + +static struct kobj_attribute hostmode_attribute = + __ATTR(usbhost_hostmode, 0666, hostmode_show, hostmode_store); + /* ----------------------------------------- */ static struct attribute *attrs[] = { &fixed_install_mode_attribute.attr, &hotplug_on_boot_attribute.attr, &fastcharge_in_host_mode_attribute.attr, + &hostmode_attribute.attr, NULL, }; @@ -80,8 +98,10 @@ int usbhost_init(void) // default values usbhost_fixed_install_mode = 1; - usbhost_hotplug_on_boot = 0; + usbhost_hotplug_on_boot = 1; usbhost_fastcharge_in_host_mode = 0; + printk("usbhost %s FI=%d HP=%d FC=%d\n", __func__, usbhost_fixed_install_mode, + usbhost_hotplug_on_boot, usbhost_fastcharge_in_host_mode); usbhost_kobj = kobject_create_and_add("usbhost", kernel_kobj); if (!usbhost_kobj) { diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 0ec5da60d7d..8b9f1c89edc 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -137,11 +137,12 @@ static int gpio_dock_in = 0; extern int usbhost_fixed_install_mode; extern int usbhost_fastcharge_in_host_mode; +extern int usbhost_hostmode; volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; -static volatile int lastOtgState = 0; +//static volatile int lastOtgState = 0; static volatile int lastChargeSlaveDevicesState = 0; static volatile int hostmode_waiting_for_power = 0; @@ -263,7 +264,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch int ret = 0; printk("smb347_configure_otg otg=%d chargeSlaves=%d stopSlaves=%d lastOtg=%d\n", - enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); + enableOTG, chargeSlaves, stopChargeSlaves, usbhost_hostmode); /*Enable volatile writes to registers*/ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -302,7 +303,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } if(enableOTG>0) { - if(!lastOtgState) { + if(!usbhost_hostmode) { printk("smb347_configure_otg enable host mode\n"); ret = smb347_update_reg(client, smb347_CMD_REG, 0x10); if (ret < 0) { @@ -310,10 +311,10 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch "0x%02x\n", __func__, smb347_CMD_REG); goto error; } - lastOtgState = 1; + usbhost_hostmode = 1; } } else if(enableOTG==0) { - if(lastOtgState) { + if(usbhost_hostmode) { printk("smb347_configure_otg disable host mode\n"); ret = smb347_read(client, smb347_CMD_REG); if (ret < 0) { @@ -326,7 +327,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch dev_err(&client->dev, "%s: err %d\n", __func__, ret); goto error; } - lastOtgState=0; + usbhost_hostmode=0; } } @@ -879,8 +880,8 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo int ret; int newExternalPowerState=0; - printk("smb347_otg_status from=%d to=%d lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", - from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,usbhost_fixed_install_mode); + printk("smb347_otg_status from=%d to=%d hostmode=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", + from,to,usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState,usbhost_fixed_install_mode); if(to==10) { // prevent race condition bug: only when going suspend (OTG PULL) @@ -999,8 +1000,8 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo } lastExternalPowerState = newExternalPowerState; - printk("smb347_otg_status DONE lastOtgState=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", - lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); + printk("smb347_otg_status DONE hostmode=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", + usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState); } /* workqueue function */ @@ -1169,10 +1170,10 @@ static void inok_isr_work_function(struct work_struct *dat) // called on power loss/gain, but also if just a bare (non-powered) OTG adapter is pulled // also if FI is disabled via sysfs - printk("inok_isr_work_function lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", - lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); + printk("inok_isr_work_function hostmode=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", + usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState); - if(lastOtgState>0 && lastExternalPowerState>0) { + if(usbhost_hostmode>0 && lastExternalPowerState>0) { // we used to be in externally powered host mode // this means external power was just lost cancel_delayed_work(&charger->curr_limit_work); @@ -1222,8 +1223,8 @@ static void inok_isr_work_function(struct work_struct *dat) "otg..\n", __func__); } - printk("inok_isr_work_function done lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", - lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); + printk("inok_isr_work_function done hostmode=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", + usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState); return; } @@ -1246,7 +1247,7 @@ static void inok_isr_work_function(struct work_struct *dat) if(!lastChargeSlaveDevicesState) { // make external power detectable printk("inok_isr_work_function make external power detectable2\n"); - // 2013-01-28: crash here after + // 2013-01-28: crash here after (in the mobile version only?) int ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -1260,7 +1261,7 @@ static void inok_isr_work_function(struct work_struct *dat) lastExternalPowerState = 1; // host_mode_charging_state may have been set by cable_type_detect() - if(host_mode_charging_state>0 && lastOtgState==0) { + if(host_mode_charging_state>0 && usbhost_hostmode==0) { printk("inok_isr_work_function external power available, start host mode\n"); if(smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState)<0) dev_err(&client->dev, "%s() error in configuring" @@ -1268,7 +1269,7 @@ static void inok_isr_work_function(struct work_struct *dat) } //smb347_clear_interrupts(client); // FIXME??? - printk("inok_isr_work_function external power available lastOtgState=%d\n",lastOtgState); + printk("inok_isr_work_function external power available hostmode=%d\n",usbhost_hostmode); } static void dockin_isr_work_function(struct work_struct *dat) @@ -1369,6 +1370,7 @@ int smb347_event_fi(void) { // called by usbhost.c sysfs change from user space struct i2c_client *client = charger->client; printk("smb347_event_fi %d\n",usbhost_fixed_install_mode); +/* if(usbhost_fixed_install_mode>0) { // from OTG to FI // make external power detectable in case it is coming back @@ -1425,6 +1427,7 @@ int smb347_event_fi(void) { // DOES power the slave, but slaves are NOT detected, not even when replugged // music plays on speaker } +*/ } diff --git a/sound/usb/card.c b/sound/usb/card.c index 54b9f1f5465..0f5e1d880c1 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -78,10 +78,11 @@ MODULE_LICENSE("GPL"); MODULE_SUPPORTED_DEVICE("{{Generic,USB Audio}}"); // tmtmtm +extern int usbhost_hotplug_on_boot; struct timer_list my_timer; struct usb_device *postpone_usb_snd_dev = NULL; struct device_driver *postpone_usb_snd_drv = NULL; -extern struct device_driver *current_drv; +extern struct device_driver *current_drv; // from base/dd.c static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ @@ -433,11 +434,11 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx, } //tmtmtm -static int mykthread(void *unused) +static void mykthread(void *unused) { printk("##### sound/usb/card.c mykthread driver_attach\n"); if(postpone_usb_snd_drv!=NULL) - driver_attach(postpone_usb_snd_drv); + driver_attach(postpone_usb_snd_drv); // drives/base/dd.c } static void delayed_func(unsigned long unused) { @@ -480,23 +481,32 @@ snd_usb_audio_probe(struct usb_device *dev, if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; - // tmtmtm: we don't want the USB DAC to become the primary sound card - // in order for a USB DAC, connected at boot time, to become available as - // an *overlay* primary sound card, we must postpone device probe - - struct timespec tp; ktime_get_ts(&tp); - if (tp.tv_sec<8 && postpone_usb_snd_dev==NULL) { - printk("##### sound/usb/card.c DON'T REGISTER EARLY tv_sec=%d ++++++++++++++++++++\n",tp.tv_sec); - postpone_usb_snd_dev = dev; - postpone_usb_snd_drv = current_drv; - init_timer(&my_timer); - my_timer.expires = jiffies + 20*HZ; // n*HZ = delay in number of seconds - my_timer.function = delayed_func; - add_timer(&my_timer); - printk("##### sound/usb/card.c delayed call to driver_attach initiated\n"); - goto __err_val; + // tmtmtm + // we may not want the USB DAC, connected at boot time, to become + // the primary sound card, rather for it to become available as + // an *overlay* primary sound card, so we postpone device probe + if(usbhost_hotplug_on_boot) { + struct timespec tp; ktime_get_ts(&tp); + if (tp.tv_sec<8 && postpone_usb_snd_dev==NULL) { + printk("##### sound/usb/card.c DON'T REGISTER EARLY tv_sec=%d ++++++++++++++++++++\n",tp.tv_sec); + + // it would be good if the delayed call to driver_attach() (which will result in UEVENT ALSA_ID) + // would not be done by time (20 sec), but by ??? + // the current strategy may prove to be not 100% reliable + + postpone_usb_snd_dev = dev; + postpone_usb_snd_drv = current_drv; + init_timer(&my_timer); + my_timer.expires = jiffies + 18*HZ; // n*HZ = delay in number of seconds + my_timer.function = delayed_func; + add_timer(&my_timer); + printk("##### sound/usb/card.c delayed call to driver_attach initiated\n"); + goto __err_val; + } + printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); + } else { + printk("##### sound/usb/card.c REGISTER !hotplug_on_boot\n"); } - //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) @@ -574,6 +584,7 @@ snd_usb_audio_probe(struct usb_device *dev, chip->num_interfaces++; chip->probing = 0; mutex_unlock(®ister_mutex); + printk("##### sound/usb/card.c snd_usb_audio_probe done OK\n"); return chip; __error: From dd3c516ea0a408034a4fdf15235e38e3177c0ecd Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 14 Mar 2013 00:24:18 -0400 Subject: [PATCH 465/678] mach-tegra: usbhost.c: disable usbhost_fixed_install_mode --- arch/arm/mach-tegra/usbhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 38c87df3a2c..3b34114021f 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -97,7 +97,7 @@ int usbhost_init(void) int retval; // default values - usbhost_fixed_install_mode = 1; + usbhost_fixed_install_mode = 0; usbhost_hotplug_on_boot = 1; usbhost_fastcharge_in_host_mode = 0; printk("usbhost %s FI=%d HP=%d FC=%d\n", __func__, usbhost_fixed_install_mode, From b735c1fc3ecfdf347976aeeb4c3ff67fde72cfcc Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 13 Apr 2013 18:42:35 -0400 Subject: [PATCH 466/678] defconfig: a53 --- arch/arm/configs/metallice_grouper_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 5163c720b57..3a77411b598 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a52" +CONFIG_LOCALVERSION="-MKernel-a53" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -366,6 +366,7 @@ CONFIG_TEGRA_PLLM_RESTRICTED=y CONFIG_TEGRA_LP2_ARM_TWD=y CONFIG_TEGRA_SLOW_CSITE=y # CONFIG_TEGRA_PREINIT_CLOCKS is not set +CONFIG_USBHOST=y # # Processor Type From 7dcbc1d8a3e0a649942572ee56d190860ca319ab Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 27 Apr 2013 15:47:23 -0400 Subject: [PATCH 467/678] Revert "mach-tegra: include: mach: add missing tegra-ahb.h file" This reverts commit 5ad9f7ecc4e60c52f650ed31a3341c81562499ee. --- arch/arm/mach-tegra/include/mach/tegra-ahb.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 arch/arm/mach-tegra/include/mach/tegra-ahb.h diff --git a/arch/arm/mach-tegra/include/mach/tegra-ahb.h b/arch/arm/mach-tegra/include/mach/tegra-ahb.h deleted file mode 100644 index e0f8c84b1d8..00000000000 --- a/arch/arm/mach-tegra/include/mach/tegra-ahb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef __MACH_TEGRA_AHB_H__ -#define __MACH_TEGRA_AHB_H__ - -extern int tegra_ahb_enable_smmu(struct device_node *ahb); - -#endif /* __MACH_TEGRA_AHB_H__ */ From 67c8a9f1f1095e273d00f32493419f9f08d164c6 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 27 Apr 2013 15:48:26 -0400 Subject: [PATCH 468/678] Revert "iommu/tegra: smmu: Refrain from accessing to AHB" This reverts commit 94b7246c25c18b8434f9dcae65f9a65110eb6db0. --- drivers/iommu/tegra-smmu.c | 63 +++++++++++++++++++++++--------------- include/linux/errno.h | 2 -- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5eb0772e51e..5f020f6a7d7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -30,15 +30,13 @@ #include #include #include -#include #include #include #include #include -#include -//#include +#include /* bitmap of the page sizes currently supported */ #define SMMU_IOMMU_PGSIZES (SZ_4K) @@ -114,6 +112,11 @@ #define SMMU_PDE_NEXT_SHIFT 28 +/* AHB Arbiter Registers */ +#define AHB_XBAR_CTRL 0xe0 +#define AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE 1 +#define AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT 17 + #define SMMU_NUM_ASIDS 4 #define SMMU_TLB_FLUSH_VA_SECTION__MASK 0xffc00000 #define SMMU_TLB_FLUSH_VA_SECTION__SHIFT 12 /* right shift */ @@ -233,7 +236,7 @@ struct smmu_as { * Per SMMU device - IOMMU device */ struct smmu_device { - void __iomem *regs; + void __iomem *regs, *regs_ahbarb; unsigned long iovmm_base; /* remappable base address */ unsigned long page_count; /* total remappable size */ spinlock_t lock; @@ -250,14 +253,12 @@ struct smmu_device { unsigned long translation_enable_1; unsigned long translation_enable_2; unsigned long asid_security; - - struct device_node *ahb; }; static struct smmu_device *smmu_handle; /* unique for a system */ /* - * SMMU register accessors + * SMMU/AHB register accessors */ static inline u32 smmu_read(struct smmu_device *smmu, size_t offs) { @@ -268,6 +269,15 @@ static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs) writel(val, smmu->regs + offs); } +static inline u32 ahb_read(struct smmu_device *smmu, size_t offs) +{ + return readl(smmu->regs_ahbarb + offs); +} +static inline void ahb_write(struct smmu_device *smmu, u32 val, size_t offs) +{ + writel(val, smmu->regs_ahbarb + offs); +} + #define VA_PAGE_TO_PA(va, page) \ (page_to_phys(page) + ((unsigned long)(va) & ~PAGE_MASK)) @@ -361,9 +371,9 @@ static void smmu_flush_regs(struct smmu_device *smmu, int enable) FLUSH_SMMU_REGS(smmu); } -static int smmu_setup_regs(struct smmu_device *smmu) +static void smmu_setup_regs(struct smmu_device *smmu) { - int i, err; + int i; u32 val; for (i = 0; i < smmu->num_as; i++) { @@ -389,8 +399,10 @@ static int smmu_setup_regs(struct smmu_device *smmu) smmu_flush_regs(smmu, 1); - err = tegra_ahb_enable_smmu(smmu->ahb); - return err; + val = ahb_read(smmu, AHB_XBAR_CTRL); + val |= AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE << + AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT; + ahb_write(smmu, val, AHB_XBAR_CTRL); } static void flush_ptc_and_tlb(struct smmu_device *smmu, @@ -862,18 +874,18 @@ static int tegra_smmu_resume(struct device *dev) { struct smmu_device *smmu = dev_get_drvdata(dev); unsigned long flags; - int err; spin_lock_irqsave(&smmu->lock, flags); - err = smmu_setup_regs(smmu); + smmu_setup_regs(smmu); spin_unlock_irqrestore(&smmu->lock, flags); - return err; + return 0; } static int tegra_smmu_probe(struct platform_device *pdev) { struct smmu_device *smmu; - struct resource *regs, *window; + struct resource *regs, *regs2; + struct tegra_smmu_window *window; struct device *dev = &pdev->dev; int i, err = 0; @@ -883,8 +895,9 @@ static int tegra_smmu_probe(struct platform_device *pdev) BUILD_BUG_ON(PAGE_SHIFT != SMMU_PAGE_SHIFT); regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - window = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!regs || !window) { + regs2 = platform_get_resource(pdev, IORESOURCE_MEM, 1); + window = tegra_smmu_window(0); + if (!regs || !regs2 || !window) { dev_err(dev, "No SMMU resources\n"); return -ENODEV; } @@ -900,16 +913,14 @@ static int tegra_smmu_probe(struct platform_device *pdev) smmu->iovmm_base = (unsigned long)window->start; smmu->page_count = (window->end + 1 - window->start) >> SMMU_PAGE_SHIFT; smmu->regs = devm_ioremap(dev, regs->start, resource_size(regs)); - if (!smmu->regs) { + smmu->regs_ahbarb = devm_ioremap(dev, regs2->start, + resource_size(regs2)); + if (!smmu->regs || !smmu->regs_ahbarb) { dev_err(dev, "failed to remap SMMU registers\n"); err = -ENXIO; goto fail; } - smmu->ahb = of_parse_phandle(pdev->dev.of_node, "ahb", 0); - if (!smmu->ahb) - return -ENODEV; - smmu->translation_enable_0 = ~0; smmu->translation_enable_1 = ~0; smmu->translation_enable_2 = ~0; @@ -936,9 +947,7 @@ static int tegra_smmu_probe(struct platform_device *pdev) INIT_LIST_HEAD(&as->client); } spin_lock_init(&smmu->lock); - err = smmu_setup_regs(smmu); - if (err) - goto fail; + smmu_setup_regs(smmu); platform_set_drvdata(pdev, smmu); smmu->avp_vector_page = alloc_page(GFP_KERNEL); @@ -953,6 +962,8 @@ static int tegra_smmu_probe(struct platform_device *pdev) __free_page(smmu->avp_vector_page); if (smmu->regs) devm_iounmap(dev, smmu->regs); + if (smmu->regs_ahbarb) + devm_iounmap(dev, smmu->regs_ahbarb); if (smmu && smmu->as) { for (i = 0; i < smmu->num_as; i++) { if (smmu->as[i].pdir_page) { @@ -984,6 +995,8 @@ static int tegra_smmu_remove(struct platform_device *pdev) __free_page(smmu->avp_vector_page); if (smmu->regs) devm_iounmap(dev, smmu->regs); + if (smmu->regs_ahbarb) + devm_iounmap(dev, smmu->regs_ahbarb); devm_kfree(dev, smmu); smmu_handle = NULL; return 0; diff --git a/include/linux/errno.h b/include/linux/errno.h index e02de468105..46685832ed9 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -16,8 +16,6 @@ #define ERESTARTNOHAND 514 /* restart if no handler.. */ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ -#define EPROBE_DEFER 517 /* Driver requests probe retry */ -#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ From ab21cfd11b9dc6b24a153a2e0576b9d204f40b35 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 27 Apr 2013 15:48:46 -0400 Subject: [PATCH 469/678] Revert "amba: tegra-ahb: Remove empty *_remove()" This reverts commit 73be60573c0d1c7ee6edc31a778db8ce49e64077. --- drivers/amba/tegra-ahb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 0b6f0b28a48..aa0b1f16052 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -264,6 +264,11 @@ static int __devinit tegra_ahb_probe(struct platform_device *pdev) return 0; } +static int __devexit tegra_ahb_remove(struct platform_device *pdev) +{ + return 0; +} + static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { { .compatible = "nvidia,tegra30-ahb", }, { .compatible = "nvidia,tegra20-ahb", }, @@ -272,6 +277,7 @@ static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { static struct platform_driver tegra_ahb_driver = { .probe = tegra_ahb_probe, + .remove = __devexit_p(tegra_ahb_remove), .driver = { .name = DRV_NAME, .owner = THIS_MODULE, From 5095511a727a3d07d4871c76a967d490ad17f338 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 27 Apr 2013 15:49:02 -0400 Subject: [PATCH 470/678] Revert "ARM: tegra: Add SMMU enabler in AHB" This reverts commit 33c9ecb45e41274777207bfcb1a24a22eb2ae0e3. --- drivers/amba/tegra-ahb.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index aa0b1f16052..106a780d29a 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -76,10 +76,6 @@ #define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 -#define AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE BIT(17) - -static struct platform_driver tegra_ahb_driver; - static const u32 tegra_ahb_gizmo[] = { AHB_ARBITRATION_DISABLE, AHB_ARBITRATION_PRIORITY_CTRL, @@ -128,34 +124,6 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) writel(value, ahb->regs + offset); } -#ifdef CONFIG_ARCH_TEGRA_3x_SOC -static int tegra_ahb_match_by_smmu(struct device *dev, void *data) -{ - struct tegra_ahb *ahb = dev_get_drvdata(dev); - struct device_node *dn = data; - - return (ahb->dev->of_node == dn) ? 1 : 0; -} - -int tegra_ahb_enable_smmu(struct device_node *dn) -{ - struct device *dev; - u32 val; - struct tegra_ahb *ahb; - - dev = driver_find_device(&tegra_ahb_driver.driver, NULL, dn, - tegra_ahb_match_by_smmu); - if (!dev) - return -EPROBE_DEFER; - ahb = dev_get_drvdata(dev); - val = gizmo_readl(ahb, AHB_ARBITRATION_XBAR_CTRL); - val |= AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE; - gizmo_writel(ahb, val, AHB_ARBITRATION_XBAR_CTRL); - return 0; -} -EXPORT_SYMBOL(tegra_ahb_enable_smmu); -#endif - static int tegra_ahb_suspend(struct device *dev) { int i; From 0107d1ae7874817a9efb2a776e8edc9d4ba8ca2a Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 27 Apr 2013 15:49:19 -0400 Subject: [PATCH 471/678] Revert "missing file from ahb commit" This reverts commit b3b34c01a789e0d4716554a0c066603787fa398b. --- .../bindings/arm/tegra/nvidia,tegra20-ahb.txt | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt deleted file mode 100644 index 234406d41c1..00000000000 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt +++ /dev/null @@ -1,11 +0,0 @@ -NVIDIA Tegra AHB - -Required properties: -- compatible : "nvidia,tegra20-ahb" or "nvidia,tegra30-ahb" -- reg : Should contain 1 register ranges(address and length) - -Example: - ahb: ahb@6000c004 { - compatible = "nvidia,tegra20-ahb"; - reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ - }; From c8ede12579a373997a6d91d2abc11bc2d5ffc962 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 27 Apr 2013 15:49:34 -0400 Subject: [PATCH 472/678] Revert "ARM: tegra: Add Tegra AHB driver" This reverts commit 98b6e31e1391be3570ff761771547d7494099d5d. --- arch/arm/configs/metallice_grouper_defconfig | 5 +- arch/arm/mach-tegra/Kconfig | 8 - drivers/Makefile | 2 +- drivers/amba/Makefile | 4 +- drivers/amba/tegra-ahb.c | 261 ------------------- 5 files changed, 4 insertions(+), 276 deletions(-) delete mode 100644 drivers/amba/tegra-ahb.c diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 3a77411b598..07961396321 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -295,7 +295,6 @@ CONFIG_ARCH_TEGRA_HAS_DUAL_CPU_CLUSTERS=y CONFIG_ARCH_TEGRA_HAS_PCIE=y CONFIG_ARCH_TEGRA_HAS_SATA=y CONFIG_TEGRA_PCI=y -CONFIG_TEGRA_AHB=y # # Tegra board type @@ -530,15 +529,13 @@ CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND=y +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_TOUCHDEMAND=y CONFIG_CPU_FREQ_GOV_INTERACTIVE=y # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 558577ff6b9..52c254f385e 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -70,14 +70,6 @@ config TEGRA_PCI help Adds PCIe Host controller driver for tegra based systems -config TEGRA_AHB - bool "Enable AHB driver for NVIDIA Tegra SoCs" - default y - help - Adds AHB configuration functionality for NVIDIA Tegra SoCs, - which controls AHB bus master arbitration and some - perfomance parameters(priority, prefech size). - comment "Tegra board type" config MACH_HARMONY diff --git a/drivers/Makefile b/drivers/Makefile index bad062b86c6..24e48fc3526 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_SFI) += sfi/ # PnP must come after ACPI since it will eventually need to check if acpi # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ -obj-$(CONFIG_ARM) += amba/ +obj-$(CONFIG_ARM_AMBA) += amba/ # Many drivers will want to use DMA so this has to be made available # really early. obj-$(CONFIG_DMA_ENGINE) += dma/ diff --git a/drivers/amba/Makefile b/drivers/amba/Makefile index 66e81c2f1e3..40fe74097be 100644 --- a/drivers/amba/Makefile +++ b/drivers/amba/Makefile @@ -1,2 +1,2 @@ -obj-$(CONFIG_ARM_AMBA) += bus.o -obj-$(CONFIG_TEGRA_AHB) += tegra-ahb.o +obj-y += bus.o + diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c deleted file mode 100644 index 106a780d29a..00000000000 --- a/drivers/amba/tegra-ahb.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * Copyright (C) 2011 Google, Inc. - * - * Author: - * Jay Cheng - * James Wylder - * Benoit Goby - * Colin Cross - * Hiroshi DOYU - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include - -#define DRV_NAME "tegra-ahb" - -#define AHB_ARBITRATION_DISABLE 0x00 -#define AHB_ARBITRATION_PRIORITY_CTRL 0x04 -#define AHB_PRIORITY_WEIGHT(x) (((x) & 0x7) << 29) -#define PRIORITY_SELECT_USB BIT(6) -#define PRIORITY_SELECT_USB2 BIT(18) -#define PRIORITY_SELECT_USB3 BIT(17) - -#define AHB_GIZMO_AHB_MEM 0x0c -#define ENB_FAST_REARBITRATE BIT(2) -#define DONT_SPLIT_AHB_WR BIT(7) - -#define AHB_GIZMO_APB_DMA 0x10 -#define AHB_GIZMO_IDE 0x18 -#define AHB_GIZMO_USB 0x1c -#define AHB_GIZMO_AHB_XBAR_BRIDGE 0x20 -#define AHB_GIZMO_CPU_AHB_BRIDGE 0x24 -#define AHB_GIZMO_COP_AHB_BRIDGE 0x28 -#define AHB_GIZMO_XBAR_APB_CTLR 0x2c -#define AHB_GIZMO_VCP_AHB_BRIDGE 0x30 -#define AHB_GIZMO_NAND 0x3c -#define AHB_GIZMO_SDMMC4 0x44 -#define AHB_GIZMO_XIO 0x48 -#define AHB_GIZMO_BSEV 0x60 -#define AHB_GIZMO_BSEA 0x70 -#define AHB_GIZMO_NOR 0x74 -#define AHB_GIZMO_USB2 0x78 -#define AHB_GIZMO_USB3 0x7c -#define IMMEDIATE BIT(18) - -#define AHB_GIZMO_SDMMC1 0x80 -#define AHB_GIZMO_SDMMC2 0x84 -#define AHB_GIZMO_SDMMC3 0x88 -#define AHB_MEM_PREFETCH_CFG_X 0xd8 -#define AHB_ARBITRATION_XBAR_CTRL 0xdc -#define AHB_MEM_PREFETCH_CFG3 0xe0 -#define AHB_MEM_PREFETCH_CFG4 0xe4 -#define AHB_MEM_PREFETCH_CFG1 0xec -#define AHB_MEM_PREFETCH_CFG2 0xf0 -#define PREFETCH_ENB BIT(31) -#define MST_ID(x) (((x) & 0x1f) << 26) -#define AHBDMA_MST_ID MST_ID(5) -#define USB_MST_ID MST_ID(6) -#define USB2_MST_ID MST_ID(18) -#define USB3_MST_ID MST_ID(17) -#define ADDR_BNDRY(x) (((x) & 0xf) << 21) -#define INACTIVITY_TIMEOUT(x) (((x) & 0xffff) << 0) - -#define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 - -static const u32 tegra_ahb_gizmo[] = { - AHB_ARBITRATION_DISABLE, - AHB_ARBITRATION_PRIORITY_CTRL, - AHB_GIZMO_AHB_MEM, - AHB_GIZMO_APB_DMA, - AHB_GIZMO_IDE, - AHB_GIZMO_USB, - AHB_GIZMO_AHB_XBAR_BRIDGE, - AHB_GIZMO_CPU_AHB_BRIDGE, - AHB_GIZMO_COP_AHB_BRIDGE, - AHB_GIZMO_XBAR_APB_CTLR, - AHB_GIZMO_VCP_AHB_BRIDGE, - AHB_GIZMO_NAND, - AHB_GIZMO_SDMMC4, - AHB_GIZMO_XIO, - AHB_GIZMO_BSEV, - AHB_GIZMO_BSEA, - AHB_GIZMO_NOR, - AHB_GIZMO_USB2, - AHB_GIZMO_USB3, - AHB_GIZMO_SDMMC1, - AHB_GIZMO_SDMMC2, - AHB_GIZMO_SDMMC3, - AHB_MEM_PREFETCH_CFG_X, - AHB_ARBITRATION_XBAR_CTRL, - AHB_MEM_PREFETCH_CFG3, - AHB_MEM_PREFETCH_CFG4, - AHB_MEM_PREFETCH_CFG1, - AHB_MEM_PREFETCH_CFG2, - AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID, -}; - -struct tegra_ahb { - void __iomem *regs; - struct device *dev; - u32 ctx[0]; -}; - -static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) -{ - return readl(ahb->regs + offset); -} - -static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) -{ - writel(value, ahb->regs + offset); -} - -static int tegra_ahb_suspend(struct device *dev) -{ - int i; - struct tegra_ahb *ahb = dev_get_drvdata(dev); - - for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) - ahb->ctx[i] = gizmo_readl(ahb, tegra_ahb_gizmo[i]); - return 0; -} - -static int tegra_ahb_resume(struct device *dev) -{ - int i; - struct tegra_ahb *ahb = dev_get_drvdata(dev); - - for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) - gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]); - return 0; -} - -static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm, - tegra_ahb_suspend, - tegra_ahb_resume, NULL); - -static void tegra_ahb_gizmo_init(struct tegra_ahb *ahb) -{ - u32 val; - - val = gizmo_readl(ahb, AHB_GIZMO_AHB_MEM); - val |= ENB_FAST_REARBITRATE | IMMEDIATE | DONT_SPLIT_AHB_WR; - gizmo_writel(ahb, val, AHB_GIZMO_AHB_MEM); - - val = gizmo_readl(ahb, AHB_GIZMO_USB); - val |= IMMEDIATE; - gizmo_writel(ahb, val, AHB_GIZMO_USB); - - val = gizmo_readl(ahb, AHB_GIZMO_USB2); - val |= IMMEDIATE; - gizmo_writel(ahb, val, AHB_GIZMO_USB2); - - val = gizmo_readl(ahb, AHB_GIZMO_USB3); - val |= IMMEDIATE; - gizmo_writel(ahb, val, AHB_GIZMO_USB3); - - val = gizmo_readl(ahb, AHB_ARBITRATION_PRIORITY_CTRL); - val |= PRIORITY_SELECT_USB | - PRIORITY_SELECT_USB2 | - PRIORITY_SELECT_USB3 | - AHB_PRIORITY_WEIGHT(7); - gizmo_writel(ahb, val, AHB_ARBITRATION_PRIORITY_CTRL); - - val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG1); - val &= ~MST_ID(~0); - val |= PREFETCH_ENB | - AHBDMA_MST_ID | - ADDR_BNDRY(0xc) | - INACTIVITY_TIMEOUT(0x1000); - gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG1); - - val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG2); - val &= ~MST_ID(~0); - val |= PREFETCH_ENB | - USB_MST_ID | - ADDR_BNDRY(0xc) | - INACTIVITY_TIMEOUT(0x1000); - gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG2); - - val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG3); - val &= ~MST_ID(~0); - val |= PREFETCH_ENB | - USB3_MST_ID | - ADDR_BNDRY(0xc) | - INACTIVITY_TIMEOUT(0x1000); - gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG3); - - val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG4); - val &= ~MST_ID(~0); - val |= PREFETCH_ENB | - USB2_MST_ID | - ADDR_BNDRY(0xc) | - INACTIVITY_TIMEOUT(0x1000); - gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG4); -} - -static int __devinit tegra_ahb_probe(struct platform_device *pdev) -{ - struct resource *res; - struct tegra_ahb *ahb; - size_t bytes; - - bytes = sizeof(*ahb) + sizeof(u32) * ARRAY_SIZE(tegra_ahb_gizmo); - ahb = devm_kzalloc(&pdev->dev, bytes, GFP_KERNEL); - if (!ahb) - return -ENOMEM; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - ahb->regs = devm_request_and_ioremap(&pdev->dev, res); - if (!ahb->regs) - return -EBUSY; - - ahb->dev = &pdev->dev; - platform_set_drvdata(pdev, ahb); - tegra_ahb_gizmo_init(ahb); - return 0; -} - -static int __devexit tegra_ahb_remove(struct platform_device *pdev) -{ - return 0; -} - -static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { - { .compatible = "nvidia,tegra30-ahb", }, - { .compatible = "nvidia,tegra20-ahb", }, - {}, -}; - -static struct platform_driver tegra_ahb_driver = { - .probe = tegra_ahb_probe, - .remove = __devexit_p(tegra_ahb_remove), - .driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - .of_match_table = tegra_ahb_of_match, - .pm = &tegra_ahb_pm, - }, -}; -module_platform_driver(tegra_ahb_driver); - -MODULE_AUTHOR("Hiroshi DOYU "); -MODULE_DESCRIPTION("Tegra AHB driver"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:" DRV_NAME); From de01b1fd5c1f076d75accd0dbf35d22a9c9a4981 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 27 Apr 2013 15:58:24 -0400 Subject: [PATCH 473/678] defconfig: a54 --- arch/arm/configs/metallice_grouper_defconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 07961396321..aa696a17889 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a53" +CONFIG_LOCALVERSION="-MKernel-a54" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -529,13 +529,15 @@ CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND=y # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_TOUCHDEMAND=y CONFIG_CPU_FREQ_GOV_INTERACTIVE=y # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set From 2b9f986db36c1134f846b18a20f728ee909042c2 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 30 Apr 2013 20:45:10 -0400 Subject: [PATCH 474/678] mach-tegra: add tegra ahb driver --- .../bindings/arm/tegra/nvidia,tegra20-ahb.txt | 11 + arch/arm/boot/dts/tegra20.dtsi | 5 + arch/arm/boot/dts/tegra30-grouper.dts | 8 + arch/arm/boot/dts/tegra30.dtsi | 575 ++++++++++++++++++ arch/arm/configs/metallice_grouper_defconfig | 3 +- arch/arm/mach-tegra/Kconfig | 8 + arch/arm/mach-tegra/Makefile.boot | 1 + arch/arm/mach-tegra/include/mach/tegra-ahb.h | 19 + drivers/Makefile | 2 +- drivers/amba/Makefile | 4 +- drivers/amba/tegra-ahb.c | 287 +++++++++ include/linux/errno.h | 3 +- 12 files changed, 921 insertions(+), 5 deletions(-) create mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt create mode 100644 arch/arm/boot/dts/tegra30-grouper.dts create mode 100644 arch/arm/boot/dts/tegra30.dtsi create mode 100644 arch/arm/mach-tegra/include/mach/tegra-ahb.h create mode 100644 drivers/amba/tegra-ahb.c diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt new file mode 100644 index 00000000000..97aca46bd7a --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt @@ -0,0 +1,11 @@ +NVIDIA Tegra AHB + +Required properties: +- compatible : "nvidia,tegra20-ahb" or "nvidia,tegra30-ahb" +- reg : Should contain 1 register ranges(address and length) + +Example: + ahb: ahb@6000c004 { + compatible = "nvidia,tegra20-ahb"; + reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ + }; diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 5727595cde6..e37c0ba662d 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -135,5 +135,10 @@ reg = <0xc8000600 0x200>; interrupts = < 63 >; }; + + ahb: ahb at 6000c004 { + compatible = "nvidia,tegra20-ahb"; + reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ + }; }; diff --git a/arch/arm/boot/dts/tegra30-grouper.dts b/arch/arm/boot/dts/tegra30-grouper.dts new file mode 100644 index 00000000000..e2bf28458d3 --- /dev/null +++ b/arch/arm/boot/dts/tegra30-grouper.dts @@ -0,0 +1,8 @@ +/dts-v1/; + +/include/ "tegra30.dtsi" + +/ { + model = "NVIDIA Tegra30 Grouper"; + compatible = "nvidia,grouper", "nvidia,tegra30"; +}; diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi new file mode 100644 index 00000000000..dbf46c27256 --- /dev/null +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -0,0 +1,575 @@ +/include/ "skeleton.dtsi" + +/ { + compatible = "nvidia,tegra30"; + interrupt-parent = <&intc>; + + aliases { + serial0 = &uarta; + serial1 = &uartb; + serial2 = &uartc; + serial3 = &uartd; + serial4 = &uarte; + }; + + host1x { + compatible = "nvidia,tegra30-host1x", "simple-bus"; + reg = <0x50000000 0x00024000>; + interrupts = <0 65 0x04 /* mpcore syncpt */ + 0 67 0x04>; /* mpcore general */ + clocks = <&tegra_car 28>; + + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0x54000000 0x54000000 0x04000000>; + + mpe { + compatible = "nvidia,tegra30-mpe"; + reg = <0x54040000 0x00040000>; + interrupts = <0 68 0x04>; + clocks = <&tegra_car 60>; + }; + + vi { + compatible = "nvidia,tegra30-vi"; + reg = <0x54080000 0x00040000>; + interrupts = <0 69 0x04>; + clocks = <&tegra_car 164>; + }; + + epp { + compatible = "nvidia,tegra30-epp"; + reg = <0x540c0000 0x00040000>; + interrupts = <0 70 0x04>; + clocks = <&tegra_car 19>; + }; + + isp { + compatible = "nvidia,tegra30-isp"; + reg = <0x54100000 0x00040000>; + interrupts = <0 71 0x04>; + clocks = <&tegra_car 23>; + }; + + gr2d { + compatible = "nvidia,tegra30-gr2d"; + reg = <0x54140000 0x00040000>; + interrupts = <0 72 0x04>; + clocks = <&tegra_car 21>; + }; + + gr3d { + compatible = "nvidia,tegra30-gr3d"; + reg = <0x54180000 0x00040000>; + clocks = <&tegra_car 24 &tegra_car 98>; + clock-names = "3d", "3d2"; + }; + + dc@54200000 { + compatible = "nvidia,tegra30-dc"; + reg = <0x54200000 0x00040000>; + interrupts = <0 73 0x04>; + clocks = <&tegra_car 27>, <&tegra_car 179>; + clock-names = "disp1", "parent"; + + rgb { + status = "disabled"; + }; + }; + + dc@54240000 { + compatible = "nvidia,tegra30-dc"; + reg = <0x54240000 0x00040000>; + interrupts = <0 74 0x04>; + clocks = <&tegra_car 26>, <&tegra_car 179>; + clock-names = "disp2", "parent"; + + rgb { + status = "disabled"; + }; + }; + + hdmi { + compatible = "nvidia,tegra30-hdmi"; + reg = <0x54280000 0x00040000>; + interrupts = <0 75 0x04>; + clocks = <&tegra_car 51>, <&tegra_car 189>; + clock-names = "hdmi", "parent"; + status = "disabled"; + }; + + tvo { + compatible = "nvidia,tegra30-tvo"; + reg = <0x542c0000 0x00040000>; + interrupts = <0 76 0x04>; + clocks = <&tegra_car 169>; + status = "disabled"; + }; + + dsi { + compatible = "nvidia,tegra30-dsi"; + reg = <0x54300000 0x00040000>; + clocks = <&tegra_car 48>; + status = "disabled"; + }; + }; + + timer@50004600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x50040600 0x20>; + interrupts = <1 13 0xf04>; + clocks = <&tegra_car 214>; + }; + + intc: interrupt-controller { + compatible = "arm,cortex-a9-gic"; + reg = <0x50041000 0x1000 + 0x50040100 0x0100>; + interrupt-controller; + #interrupt-cells = <3>; + }; + + cache-controller { + compatible = "arm,pl310-cache"; + reg = <0x50043000 0x1000>; + arm,data-latency = <6 6 2>; + arm,tag-latency = <5 5 2>; + cache-unified; + cache-level = <2>; + }; + + timer@60005000 { + compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer"; + reg = <0x60005000 0x400>; + interrupts = <0 0 0x04 + 0 1 0x04 + 0 41 0x04 + 0 42 0x04 + 0 121 0x04 + 0 122 0x04>; + }; + + tegra_car: clock { + compatible = "nvidia,tegra30-car"; + reg = <0x60006000 0x1000>; + #clock-cells = <1>; + }; + + apbdma: dma { + compatible = "nvidia,tegra30-apbdma", "nvidia,tegra20-apbdma"; + reg = <0x6000a000 0x1400>; + interrupts = <0 104 0x04 + 0 105 0x04 + 0 106 0x04 + 0 107 0x04 + 0 108 0x04 + 0 109 0x04 + 0 110 0x04 + 0 111 0x04 + 0 112 0x04 + 0 113 0x04 + 0 114 0x04 + 0 115 0x04 + 0 116 0x04 + 0 117 0x04 + 0 118 0x04 + 0 119 0x04 + 0 128 0x04 + 0 129 0x04 + 0 130 0x04 + 0 131 0x04 + 0 132 0x04 + 0 133 0x04 + 0 134 0x04 + 0 135 0x04 + 0 136 0x04 + 0 137 0x04 + 0 138 0x04 + 0 139 0x04 + 0 140 0x04 + 0 141 0x04 + 0 142 0x04 + 0 143 0x04>; + clocks = <&tegra_car 34>; + }; + + ahb: ahb { + compatible = "nvidia,tegra30-ahb"; + reg = <0x6000c004 0x14c>; /* AHB Arbitration + Gizmo Controller */ + }; + + gpio: gpio { + compatible = "nvidia,tegra30-gpio"; + reg = <0x6000d000 0x1000>; + interrupts = <0 32 0x04 + 0 33 0x04 + 0 34 0x04 + 0 35 0x04 + 0 55 0x04 + 0 87 0x04 + 0 89 0x04 + 0 125 0x04>; + #gpio-cells = <2>; + gpio-controller; + #interrupt-cells = <2>; + interrupt-controller; + }; + + pinmux: pinmux { + compatible = "nvidia,tegra30-pinmux"; + reg = <0x70000868 0xd4 /* Pad control registers */ + 0x70003000 0x3e4>; /* Mux registers */ + }; + + /* + * There are two serial driver i.e. 8250 based simple serial + * driver and APB DMA based serial driver for higher baudrate + * and performace. To enable the 8250 based driver, the compatible + * is "nvidia,tegra30-uart", "nvidia,tegra20-uart" and to enable + * the APB DMA based serial driver, the comptible is + * "nvidia,tegra30-hsuart", "nvidia,tegra20-hsuart". + */ + uarta: serial@70006000 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006000 0x40>; + reg-shift = <2>; + interrupts = <0 36 0x04>; + nvidia,dma-request-selector = <&apbdma 8>; + clocks = <&tegra_car 6>; + status = "disabled"; + }; + + uartb: serial@70006040 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006040 0x40>; + reg-shift = <2>; + interrupts = <0 37 0x04>; + nvidia,dma-request-selector = <&apbdma 9>; + clocks = <&tegra_car 160>; + status = "disabled"; + }; + + uartc: serial@70006200 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006200 0x100>; + reg-shift = <2>; + interrupts = <0 46 0x04>; + nvidia,dma-request-selector = <&apbdma 10>; + clocks = <&tegra_car 55>; + status = "disabled"; + }; + + uartd: serial@70006300 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006300 0x100>; + reg-shift = <2>; + interrupts = <0 90 0x04>; + nvidia,dma-request-selector = <&apbdma 19>; + clocks = <&tegra_car 65>; + status = "disabled"; + }; + + uarte: serial@70006400 { + compatible = "nvidia,tegra30-uart", "nvidia,tegra20-uart"; + reg = <0x70006400 0x100>; + reg-shift = <2>; + interrupts = <0 91 0x04>; + nvidia,dma-request-selector = <&apbdma 20>; + clocks = <&tegra_car 66>; + status = "disabled"; + }; + + pwm: pwm { + compatible = "nvidia,tegra30-pwm", "nvidia,tegra20-pwm"; + reg = <0x7000a000 0x100>; + #pwm-cells = <2>; + clocks = <&tegra_car 17>; + }; + + rtc { + compatible = "nvidia,tegra30-rtc", "nvidia,tegra20-rtc"; + reg = <0x7000e000 0x100>; + interrupts = <0 2 0x04>; + }; + + i2c@7000c000 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c000 0x100>; + interrupts = <0 38 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 12>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000c400 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c400 0x100>; + interrupts = <0 84 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 54>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000c500 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c500 0x100>; + interrupts = <0 92 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 67>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000c700 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000c700 0x100>; + interrupts = <0 120 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 103>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + i2c@7000d000 { + compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c"; + reg = <0x7000d000 0x100>; + interrupts = <0 53 0x04>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 47>, <&tegra_car 182>; + clock-names = "div-clk", "fast-clk"; + status = "disabled"; + }; + + spi@7000d400 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000d400 0x200>; + interrupts = <0 59 0x04>; + nvidia,dma-request-selector = <&apbdma 15>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 41>; + status = "disabled"; + }; + + spi@7000d600 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000d600 0x200>; + interrupts = <0 82 0x04>; + nvidia,dma-request-selector = <&apbdma 16>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 44>; + status = "disabled"; + }; + + spi@7000d800 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000d800 0x200>; + interrupts = <0 83 0x04>; + nvidia,dma-request-selector = <&apbdma 17>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 46>; + status = "disabled"; + }; + + spi@7000da00 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000da00 0x200>; + interrupts = <0 93 0x04>; + nvidia,dma-request-selector = <&apbdma 18>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 68>; + status = "disabled"; + }; + + spi@7000dc00 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000dc00 0x200>; + interrupts = <0 94 0x04>; + nvidia,dma-request-selector = <&apbdma 27>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 104>; + status = "disabled"; + }; + + spi@7000de00 { + compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; + reg = <0x7000de00 0x200>; + interrupts = <0 79 0x04>; + nvidia,dma-request-selector = <&apbdma 28>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&tegra_car 105>; + status = "disabled"; + }; + + kbc { + compatible = "nvidia,tegra30-kbc", "nvidia,tegra20-kbc"; + reg = <0x7000e200 0x100>; + interrupts = <0 85 0x04>; + clocks = <&tegra_car 36>; + status = "disabled"; + }; + + pmc { + compatible = "nvidia,tegra20-pmc", "nvidia,tegra30-pmc"; + reg = <0x7000e400 0x400>; + }; + + memory-controller { + compatible = "nvidia,tegra30-mc"; + reg = <0x7000f000 0x010 + 0x7000f03c 0x1b4 + 0x7000f200 0x028 + 0x7000f284 0x17c>; + interrupts = <0 77 0x04>; + }; + + iommu { + compatible = "nvidia,tegra30-smmu"; + reg = <0x7000f010 0x02c + 0x7000f1f0 0x010 + 0x7000f228 0x05c>; + nvidia,#asids = <4>; /* # of ASIDs */ + dma-window = <0 0x40000000>; /* IOVA start & length */ + nvidia,ahb = <&ahb>; + }; + + ahub { + compatible = "nvidia,tegra30-ahub"; + reg = <0x70080000 0x200 + 0x70080200 0x100>; + interrupts = <0 103 0x04>; + nvidia,dma-request-selector = <&apbdma 1>; + clocks = <&tegra_car 106>, <&tegra_car 107>, <&tegra_car 30>, + <&tegra_car 11>, <&tegra_car 18>, <&tegra_car 101>, + <&tegra_car 102>, <&tegra_car 108>, <&tegra_car 109>, + <&tegra_car 110>, <&tegra_car 162>; + clock-names = "d_audio", "apbif", "i2s0", "i2s1", "i2s2", + "i2s3", "i2s4", "dam0", "dam1", "dam2", + "spdif_in"; + ranges; + #address-cells = <1>; + #size-cells = <1>; + + tegra_i2s0: i2s@70080300 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080300 0x100>; + nvidia,ahub-cif-ids = <4 4>; + clocks = <&tegra_car 30>; + status = "disabled"; + }; + + tegra_i2s1: i2s@70080400 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080400 0x100>; + nvidia,ahub-cif-ids = <5 5>; + clocks = <&tegra_car 11>; + status = "disabled"; + }; + + tegra_i2s2: i2s@70080500 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080500 0x100>; + nvidia,ahub-cif-ids = <6 6>; + clocks = <&tegra_car 18>; + status = "disabled"; + }; + + tegra_i2s3: i2s@70080600 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080600 0x100>; + nvidia,ahub-cif-ids = <7 7>; + clocks = <&tegra_car 101>; + status = "disabled"; + }; + + tegra_i2s4: i2s@70080700 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080700 0x100>; + nvidia,ahub-cif-ids = <8 8>; + clocks = <&tegra_car 102>; + status = "disabled"; + }; + }; + + sdhci@78000000 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000000 0x200>; + interrupts = <0 14 0x04>; + clocks = <&tegra_car 14>; + status = "disabled"; + }; + + sdhci@78000200 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000200 0x200>; + interrupts = <0 15 0x04>; + clocks = <&tegra_car 9>; + status = "disabled"; + }; + + sdhci@78000400 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000400 0x200>; + interrupts = <0 19 0x04>; + clocks = <&tegra_car 69>; + status = "disabled"; + }; + + sdhci@78000600 { + compatible = "nvidia,tegra30-sdhci", "nvidia,tegra20-sdhci"; + reg = <0x78000600 0x200>; + interrupts = <0 31 0x04>; + clocks = <&tegra_car 15>; + status = "disabled"; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <3>; + }; + }; + + pmu { + compatible = "arm,cortex-a9-pmu"; + interrupts = <0 144 0x04 + 0 145 0x04 + 0 146 0x04 + 0 147 0x04>; + }; +}; diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index aa696a17889..83925d9216b 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a54" +CONFIG_LOCALVERSION="-MKernel-a0" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -295,6 +295,7 @@ CONFIG_ARCH_TEGRA_HAS_DUAL_CPU_CLUSTERS=y CONFIG_ARCH_TEGRA_HAS_PCIE=y CONFIG_ARCH_TEGRA_HAS_SATA=y CONFIG_TEGRA_PCI=y +CONFIG_TEGRA_AHB=y # # Tegra board type diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 52c254f385e..558577ff6b9 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -70,6 +70,14 @@ config TEGRA_PCI help Adds PCIe Host controller driver for tegra based systems +config TEGRA_AHB + bool "Enable AHB driver for NVIDIA Tegra SoCs" + default y + help + Adds AHB configuration functionality for NVIDIA Tegra SoCs, + which controls AHB bus master arbitration and some + perfomance parameters(priority, prefech size). + comment "Tegra board type" config MACH_HARMONY diff --git a/arch/arm/mach-tegra/Makefile.boot b/arch/arm/mach-tegra/Makefile.boot index d8cb9173cdf..e672efe4b82 100644 --- a/arch/arm/mach-tegra/Makefile.boot +++ b/arch/arm/mach-tegra/Makefile.boot @@ -8,3 +8,4 @@ initrd_phys-$(CONFIG_ARCH_TEGRA_3x_SOC) := 0x80800000 dtb-$(CONFIG_MACH_HARMONY) += tegra-harmony.dtb dtb-$(CONFIG_MACH_SEABOARD) += tegra-seaboard.dtb +dtb-$(CONFIG_MACH_GROUPER) += tegra30-grouper.dtb diff --git a/arch/arm/mach-tegra/include/mach/tegra-ahb.h b/arch/arm/mach-tegra/include/mach/tegra-ahb.h new file mode 100644 index 00000000000..e0f8c84b1d8 --- /dev/null +++ b/arch/arm/mach-tegra/include/mach/tegra-ahb.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __MACH_TEGRA_AHB_H__ +#define __MACH_TEGRA_AHB_H__ + +extern int tegra_ahb_enable_smmu(struct device_node *ahb); + +#endif /* __MACH_TEGRA_AHB_H__ */ diff --git a/drivers/Makefile b/drivers/Makefile index 24e48fc3526..bad062b86c6 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_SFI) += sfi/ # PnP must come after ACPI since it will eventually need to check if acpi # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ -obj-$(CONFIG_ARM_AMBA) += amba/ +obj-$(CONFIG_ARM) += amba/ # Many drivers will want to use DMA so this has to be made available # really early. obj-$(CONFIG_DMA_ENGINE) += dma/ diff --git a/drivers/amba/Makefile b/drivers/amba/Makefile index 40fe74097be..66e81c2f1e3 100644 --- a/drivers/amba/Makefile +++ b/drivers/amba/Makefile @@ -1,2 +1,2 @@ -obj-y += bus.o - +obj-$(CONFIG_ARM_AMBA) += bus.o +obj-$(CONFIG_TEGRA_AHB) += tegra-ahb.o diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c new file mode 100644 index 00000000000..66a63d50048 --- /dev/null +++ b/drivers/amba/tegra-ahb.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * Copyright (C) 2011 Google, Inc. + * + * Author: + * Jay Cheng + * James Wylder + * Benoit Goby + * Colin Cross + * Hiroshi DOYU + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include + +#define DRV_NAME "tegra-ahb" + +#define AHB_ARBITRATION_DISABLE 0x00 +#define AHB_ARBITRATION_PRIORITY_CTRL 0x04 +#define AHB_PRIORITY_WEIGHT(x) (((x) & 0x7) << 29) +#define PRIORITY_SELECT_USB BIT(6) +#define PRIORITY_SELECT_USB2 BIT(18) +#define PRIORITY_SELECT_USB3 BIT(17) + +#define AHB_GIZMO_AHB_MEM 0x0c +#define ENB_FAST_REARBITRATE BIT(2) +#define DONT_SPLIT_AHB_WR BIT(7) + +#define AHB_GIZMO_APB_DMA 0x10 +#define AHB_GIZMO_IDE 0x18 +#define AHB_GIZMO_USB 0x1c +#define AHB_GIZMO_AHB_XBAR_BRIDGE 0x20 +#define AHB_GIZMO_CPU_AHB_BRIDGE 0x24 +#define AHB_GIZMO_COP_AHB_BRIDGE 0x28 +#define AHB_GIZMO_XBAR_APB_CTLR 0x2c +#define AHB_GIZMO_VCP_AHB_BRIDGE 0x30 +#define AHB_GIZMO_NAND 0x3c +#define AHB_GIZMO_SDMMC4 0x44 +#define AHB_GIZMO_XIO 0x48 +#define AHB_GIZMO_BSEV 0x60 +#define AHB_GIZMO_BSEA 0x70 +#define AHB_GIZMO_NOR 0x74 +#define AHB_GIZMO_USB2 0x78 +#define AHB_GIZMO_USB3 0x7c +#define IMMEDIATE BIT(18) + +#define AHB_GIZMO_SDMMC1 0x80 +#define AHB_GIZMO_SDMMC2 0x84 +#define AHB_GIZMO_SDMMC3 0x88 +#define AHB_MEM_PREFETCH_CFG_X 0xd8 +#define AHB_ARBITRATION_XBAR_CTRL 0xdc +#define AHB_MEM_PREFETCH_CFG3 0xe0 +#define AHB_MEM_PREFETCH_CFG4 0xe4 +#define AHB_MEM_PREFETCH_CFG1 0xec +#define AHB_MEM_PREFETCH_CFG2 0xf0 +#define PREFETCH_ENB BIT(31) +#define MST_ID(x) (((x) & 0x1f) << 26) +#define AHBDMA_MST_ID MST_ID(5) +#define USB_MST_ID MST_ID(6) +#define USB2_MST_ID MST_ID(18) +#define USB3_MST_ID MST_ID(17) +#define ADDR_BNDRY(x) (((x) & 0xf) << 21) +#define INACTIVITY_TIMEOUT(x) (((x) & 0xffff) << 0) + +#define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 + +#define AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE BIT(17) + +static struct platform_driver tegra_ahb_driver; + +static const u32 tegra_ahb_gizmo[] = { + AHB_ARBITRATION_DISABLE, + AHB_ARBITRATION_PRIORITY_CTRL, + AHB_GIZMO_AHB_MEM, + AHB_GIZMO_APB_DMA, + AHB_GIZMO_IDE, + AHB_GIZMO_USB, + AHB_GIZMO_AHB_XBAR_BRIDGE, + AHB_GIZMO_CPU_AHB_BRIDGE, + AHB_GIZMO_COP_AHB_BRIDGE, + AHB_GIZMO_XBAR_APB_CTLR, + AHB_GIZMO_VCP_AHB_BRIDGE, + AHB_GIZMO_NAND, + AHB_GIZMO_SDMMC4, + AHB_GIZMO_XIO, + AHB_GIZMO_BSEV, + AHB_GIZMO_BSEA, + AHB_GIZMO_NOR, + AHB_GIZMO_USB2, + AHB_GIZMO_USB3, + AHB_GIZMO_SDMMC1, + AHB_GIZMO_SDMMC2, + AHB_GIZMO_SDMMC3, + AHB_MEM_PREFETCH_CFG_X, + AHB_ARBITRATION_XBAR_CTRL, + AHB_MEM_PREFETCH_CFG3, + AHB_MEM_PREFETCH_CFG4, + AHB_MEM_PREFETCH_CFG1, + AHB_MEM_PREFETCH_CFG2, + AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID, +}; + +struct tegra_ahb { + void __iomem *regs; + struct device *dev; + u32 ctx[0]; +}; + +static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) +{ + return readl(ahb->regs + offset); +} + +static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) +{ + writel(value, ahb->regs + offset); +} + +#ifdef CONFIG_ARCH_TEGRA_3x_SOC +static int tegra_ahb_match_by_smmu(struct device *dev, void *data) +{ + struct tegra_ahb *ahb = dev_get_drvdata(dev); + struct device_node *dn = data; + + return (ahb->dev->of_node == dn) ? 1 : 0; +} + +int tegra_ahb_enable_smmu(struct device_node *dn) +{ + struct device *dev; + u32 val; + struct tegra_ahb *ahb; + + dev = driver_find_device(&tegra_ahb_driver.driver, NULL, dn, + tegra_ahb_match_by_smmu); + if (!dev) + return -EPROBE_DEFER; + ahb = dev_get_drvdata(dev); + val = gizmo_readl(ahb, AHB_ARBITRATION_XBAR_CTRL); + val |= AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE; + gizmo_writel(ahb, val, AHB_ARBITRATION_XBAR_CTRL); + return 0; +} +EXPORT_SYMBOL(tegra_ahb_enable_smmu); +#endif + +static int tegra_ahb_suspend(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + ahb->ctx[i] = gizmo_readl(ahb, tegra_ahb_gizmo[i]); + return 0; +} + +static int tegra_ahb_resume(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]); + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm, + tegra_ahb_suspend, + tegra_ahb_resume, NULL); + +static void tegra_ahb_gizmo_init(struct tegra_ahb *ahb) +{ + u32 val; + + val = gizmo_readl(ahb, AHB_GIZMO_AHB_MEM); + val |= ENB_FAST_REARBITRATE | IMMEDIATE | DONT_SPLIT_AHB_WR; + gizmo_writel(ahb, val, AHB_GIZMO_AHB_MEM); + + val = gizmo_readl(ahb, AHB_GIZMO_USB); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB); + + val = gizmo_readl(ahb, AHB_GIZMO_USB2); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB2); + + val = gizmo_readl(ahb, AHB_GIZMO_USB3); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB3); + + val = gizmo_readl(ahb, AHB_ARBITRATION_PRIORITY_CTRL); + val |= PRIORITY_SELECT_USB | + PRIORITY_SELECT_USB2 | + PRIORITY_SELECT_USB3 | + AHB_PRIORITY_WEIGHT(7); + gizmo_writel(ahb, val, AHB_ARBITRATION_PRIORITY_CTRL); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG1); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + AHBDMA_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG1); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG2); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG2); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG3); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB3_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG3); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG4); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB2_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG4); +} + +static int __devinit tegra_ahb_probe(struct platform_device *pdev) +{ + struct resource *res; + struct tegra_ahb *ahb; + size_t bytes; + + bytes = sizeof(*ahb) + sizeof(u32) * ARRAY_SIZE(tegra_ahb_gizmo); + ahb = devm_kzalloc(&pdev->dev, bytes, GFP_KERNEL); + if (!ahb) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + ahb->regs = devm_request_and_ioremap(&pdev->dev, res); + if (!ahb->regs) + return -EBUSY; + + ahb->dev = &pdev->dev; + platform_set_drvdata(pdev, ahb); + tegra_ahb_gizmo_init(ahb); + return 0; +} + +static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { + { .compatible = "nvidia,tegra30-ahb", }, + { .compatible = "nvidia,tegra20-ahb", }, + {}, +}; + +static struct platform_driver tegra_ahb_driver = { + .probe = tegra_ahb_probe, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = tegra_ahb_of_match, + .pm = &tegra_ahb_pm, + }, +}; +module_platform_driver(tegra_ahb_driver); + +MODULE_AUTHOR("Hiroshi DOYU "); +MODULE_DESCRIPTION("Tegra AHB driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/include/linux/errno.h b/include/linux/errno.h index 46685832ed9..2e2b696e224 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -16,7 +16,8 @@ #define ERESTARTNOHAND 514 /* restart if no handler.. */ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ - +#define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ #define ENOTSYNC 522 /* Update synchronization mismatch */ From f990a5adfa296997ae75f5b6f289248ac753fecd Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 7 May 2013 20:40:29 -0400 Subject: [PATCH 475/678] cpufreq: touchdemand: performance tuning --- drivers/cpufreq/cpufreq_touchdemand.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index a6019ab3378..7d1fd14c43d 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -45,12 +45,12 @@ unsigned int min_cpus_lock; * It helps to keep variable names smaller, simpler */ -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define DEF_FREQUENCY_UP_THRESHOLD (98) -#define DEF_SAMPLING_DOWN_FACTOR (2) +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (90) +#define DEF_SAMPLING_DOWN_FACTOR (4) #define MAX_SAMPLING_DOWN_FACTOR (100000) -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (5) -#define MICRO_FREQUENCY_UP_THRESHOLD (98) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define MICRO_FREQUENCY_UP_THRESHOLD (90) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -375,7 +375,7 @@ static ssize_t store_touch_factor(struct kobject *a, struct attribute *b, return count; } -static unsigned int Touch_poke_attr[4] = {1200000, 1000000, 0, 0}; +static unsigned int Touch_poke_attr[4] = {1200000, 1200000, 0, 0}; static unsigned int Touch_poke_boost = 1; static unsigned long Touch_poke_boost_till_jiffies = 0; From 730055f4e8aa4990ea92faf5ce9a9bbe743b33f7 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 7 May 2013 21:34:37 -0400 Subject: [PATCH 476/678] mach-tegra: kconfig: typo fix --- arch/arm/mach-tegra/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 558577ff6b9..ff52f42efbd 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -340,16 +340,16 @@ config LP_OVERCLOCK bool "Enable LP overclock for Tegra3" depends on TEGRA_SILICON_PLATFORM default n - help - Choose y to overclock the LP core. - If Off, maximum clock speed is 475MHz. - If On, LP clock speed can be selected. + ---help--- + Choose y to overclock the LP core. + If Off, maximum clock speed is 475MHz. + If On, LP clock speed can be selected. choice depends on LP_OVERCLOCK prompt "Maximum LP Rate" - default GPU_OC_666 + default LP_OC_666 ---help--- Select the desired LP overclock rate. From 00cf52f1b91ca4b2b5e12400943bdeab0a954a73 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 12 May 2013 15:55:58 -0400 Subject: [PATCH 477/678] cpufreq: interactive: add core lock --- drivers/cpufreq/cpufreq_interactive.c | 115 +++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 7678d48ab4c..8cd5d736029 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -31,6 +31,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -63,8 +64,24 @@ static cpumask_t speedchange_cpumask; static spinlock_t speedchange_cpumask_lock; static struct mutex gov_lock; +struct cpufreq_interactive_core_lock { + struct pm_qos_request_list qos_min_req; + struct pm_qos_request_list qos_max_req; + struct task_struct *lock_task; + struct work_struct unlock_work; + struct timer_list unlock_timer; + int request_active; + unsigned long lock_period; + struct mutex mutex; +}; + +/* default timeout for core lock down */ +#define DEFAULT_CORE_LOCK_PERIOD 200000 /* 200 ms */ + +static struct cpufreq_interactive_core_lock core_lock; + /* Hi speed to bump to from lo speed when load burst (default max) */ -static unsigned int hispeed_freq = 1200; +static unsigned int hispeed_freq; /* Go to hi speed when CPU load at or above this value. */ #define DEFAULT_GO_HISPEED_LOAD 99 @@ -669,6 +686,71 @@ static struct global_attr target_loads_attr = __ATTR(target_loads, S_IRUGO | S_IWUSR, show_target_loads, store_target_loads); +static void cpufreq_interactive_core_lock_timer(unsigned long data) +{ + queue_work(inputopen_wq, &core_lock.unlock_work); +} + +static void cpufreq_interactive_unlock_cores(struct work_struct *wq) +{ + struct cpufreq_interactive_core_lock *cl = + container_of(wq, struct cpufreq_interactive_core_lock, + unlock_work); + + mutex_lock(&cl->mutex); + + if (--cl->request_active) { + goto done; + } + + pm_qos_update_request(&cl->qos_min_req, + PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + + pm_qos_update_request(&cl->qos_max_req, + PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); + +done: + mutex_unlock(&cl->mutex); +} + +/* Lock down to whatever # of cores online + * right now. + * + * A pm_qos request for 1 online CPU results in + * an instant cluster switch. + */ +static void cpufreq_interactive_lock_cores(void) +{ + unsigned int ncpus; + + mutex_lock(&core_lock.mutex); + + if (core_lock.request_active) { + goto arm_timer; + } + + ncpus = num_online_cpus(); + pm_qos_update_request(&core_lock.qos_min_req, ncpus); + pm_qos_update_request(&core_lock.qos_max_req, ncpus); + core_lock.request_active++; + +arm_timer: + mod_timer(&core_lock.unlock_timer, + jiffies + usecs_to_jiffies(core_lock.lock_period)); + + mutex_unlock(&core_lock.mutex); +} + +static int cpufreq_interactive_lock_cores_task(void *data) +{ + while(1) { + cpufreq_interactive_lock_cores(); + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + return 0; +} + /* * Pulsed boost on input event raises CPUs to hispeed_freq and lets * usual algorithm of min_sample_time decide when to allow speed @@ -680,6 +762,7 @@ static void cpufreq_interactive_input_event(struct input_handle *handle, unsigned int code, int value) { if (input_boost_val && type == EV_SYN && code == SYN_REPORT) { + wake_up_process(core_lock.lock_task); trace_cpufreq_interactive_boost("input"); cpufreq_interactive_boost(); } @@ -1178,7 +1261,32 @@ static int __init cpufreq_interactive_init(void) if (!inputopen_wq) goto err_freetask; + pm_qos_add_request(&core_lock.qos_min_req, PM_QOS_MIN_ONLINE_CPUS, + PM_QOS_MIN_ONLINE_CPUS_DEFAULT_VALUE); + + pm_qos_add_request(&core_lock.qos_max_req, PM_QOS_MAX_ONLINE_CPUS, + PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE); + + init_timer(&core_lock.unlock_timer); + core_lock.unlock_timer.function = cpufreq_interactive_core_lock_timer; + core_lock.unlock_timer.data = 0; + + core_lock.request_active = 0; + core_lock.lock_period = DEFAULT_CORE_LOCK_PERIOD; + mutex_init(&core_lock.mutex); + + core_lock.lock_task = kthread_create(cpufreq_interactive_lock_cores_task, NULL, + "kinteractive_lockcores"); + + if (IS_ERR(core_lock.lock_task)) + return PTR_ERR(core_lock.lock_task); + + sched_setscheduler_nocheck(core_lock.lock_task, SCHED_FIFO, ¶m); + get_task_struct(core_lock.lock_task); + + INIT_WORK(&inputopen.inputopen_work, cpufreq_interactive_input_open); + INIT_WORK(&core_lock.unlock_work, cpufreq_interactive_unlock_cores); /* NB: wake up so the thread does not look hung to the freezer */ wake_up_process(speedchange_task); @@ -1202,6 +1310,11 @@ static void __exit cpufreq_interactive_exit(void) kthread_stop(speedchange_task); put_task_struct(speedchange_task); destroy_workqueue(inputopen_wq); + + pm_qos_remove_request(&core_lock.qos_min_req); + pm_qos_remove_request(&core_lock.qos_max_req); + kthread_stop(core_lock.lock_task); + put_task_struct(core_lock.lock_task); } module_exit(cpufreq_interactive_exit); From f1286bd4a3127769390becb3dd8506a2858e0219 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 14 May 2013 14:00:22 -0400 Subject: [PATCH 478/678] defconfig: revert to mostly stock (test) --- arch/arm/configs/metallice_grouper_defconfig | 119 +++++++++---------- 1 file changed, 58 insertions(+), 61 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 83925d9216b..31d9f71097a 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -78,7 +78,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y # CONFIG_CGROUP_DEVICE is not set # CONFIG_CPUSETS is not set @@ -92,7 +92,7 @@ CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y # CONFIG_BLK_CGROUP is not set # CONFIG_NAMESPACES is not set -CONFIG_SCHED_AUTOGROUP=y +# CONFIG_SCHED_AUTOGROUP is not set # CONFIG_SYSFS_DEPRECATED is not set # CONFIG_RELAY is not set CONFIG_BLK_DEV_INITRD=y @@ -140,8 +140,9 @@ CONFIG_COMPAT_BRK=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set -# CONFIG_PROFILING is not set +CONFIG_PROFILING=y CONFIG_TRACEPOINTS=y +CONFIG_OPROFILE=y CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_KPROBES=y @@ -179,13 +180,14 @@ CONFIG_LBDAF=y CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_ROW=y -# CONFIG_IOSCHED_CFQ is not set +CONFIG_IOSCHED_CFQ=y # CONFIG_IOSCHED_SIO is not set # CONFIG_IOSCHED_VR is not set CONFIG_IOSCHED_BFQ=y CONFIG_CGROUP_BFQIO=y # CONFIG_DEFAULT_DEADLINE is not set CONFIG_DEFAULT_ROW=y +# CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_BFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="row" @@ -302,7 +304,7 @@ CONFIG_TEGRA_AHB=y # # CONFIG_MACH_TEGRA_DT is not set # CONFIG_MACH_ARUBA is not set -# CONFIG_MACH_CARDHU is not set +CONFIG_MACH_CARDHU=y # CONFIG_MACH_P1852 is not set # CONFIG_MACH_TEGRA_ENTERPRISE is not set # CONFIG_MACH_KAI is not set @@ -314,6 +316,7 @@ CONFIG_TEGRA_DEBUG_UART_NONE=y CONFIG_TEGRA_SYSTEM_DMA=y CONFIG_TEGRA_PWM=y CONFIG_TEGRA_FIQ_DEBUGGER=y +# CONFIG_TEGRA_CARDHU_DSI is not set CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_VOLTAGE_CONTROL=y CONFIG_CUSTOM_BRIGHTNESS=y @@ -357,14 +360,14 @@ CONFIG_TEGRA_VARIANT_INFO=y CONFIG_USB_HOTPLUG=y CONFIG_TEGRA_DYNAMIC_PWRDET=y CONFIG_TEGRA_EDP_EXACT_FREQ=y -CONFIG_TEGRA_USB_MODEM_POWER=y +# CONFIG_TEGRA_USB_MODEM_POWER is not set CONFIG_TEGRA_BB_XMM_POWER=y # CONFIG_TEGRA_BB_XMM_POWER2 is not set # CONFIG_TEGRA_THERMAL_SYSFS is not set CONFIG_TEGRA_PLLM_RESTRICTED=y # CONFIG_TEGRA_WDT_RECOVERY is not set CONFIG_TEGRA_LP2_ARM_TWD=y -CONFIG_TEGRA_SLOW_CSITE=y +# CONFIG_TEGRA_SLOW_CSITE is not set # CONFIG_TEGRA_PREINIT_CLOCKS is not set CONFIG_USBHOST=y @@ -510,9 +513,7 @@ CONFIG_CMDLINE="tegra_wdt.heartbeat=30" CONFIG_CMDLINE_EXTEND=y # CONFIG_CMDLINE_FORCE is not set # CONFIG_XIP_KERNEL is not set -CONFIG_KEXEC=y -CONFIG_ATAGS_PROC=y -CONFIG_KEXEC_HARDBOOT=y +# CONFIG_KEXEC is not set # CONFIG_CRASH_DUMP is not set # CONFIG_AUTO_ZRELADDR is not set @@ -535,12 +536,12 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND=y # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set -# CONFIG_CPU_FREQ_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_TOUCHDEMAND=y CONFIG_CPU_FREQ_GOV_INTERACTIVE=y -# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y # CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set # CONFIG_CPU_FREQ_GOV_PEGASUSQ is not set @@ -592,7 +593,10 @@ CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y CONFIG_PM_RUNTIME=y CONFIG_PM=y -# CONFIG_PM_DEBUG is not set +CONFIG_PM_DEBUG=y +# CONFIG_PM_ADVANCED_DEBUG is not set +# CONFIG_PM_TEST_SUSPEND is not set +CONFIG_CAN_PM_TRACE=y # CONFIG_APM_EMULATION is not set CONFIG_PM_CLK=y CONFIG_SUSPEND_TIME=y @@ -624,12 +628,11 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set -CONFIG_NET_IPGRE_DEMUX=y -# CONFIG_NET_IPGRE is not set +# CONFIG_NET_IPGRE_DEMUX is not set # CONFIG_IP_MROUTE is not set # CONFIG_ARPD is not set # CONFIG_SYN_COOKIES is not set -CONFIG_INET_AH=y +# CONFIG_INET_AH is not set CONFIG_INET_ESP=y # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set @@ -862,9 +865,7 @@ CONFIG_IP6_NF_RAW=y # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set -CONFIG_L2TP=y -# CONFIG_L2TP_DEBUGFS is not set -# CONFIG_L2TP_V3 is not set +# CONFIG_L2TP is not set # CONFIG_BRIDGE is not set # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set @@ -938,7 +939,6 @@ CONFIG_NET_ACT_MIRRED=y # CONFIG_NET_CLS_IND is not set CONFIG_NET_SCH_FIFO=y # CONFIG_DCB is not set -CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y @@ -1353,8 +1353,6 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_BSDCOMP=y CONFIG_PPP_MPPE=y # CONFIG_PPPOE is not set -# CONFIG_PPTP is not set -# CONFIG_PPPOL2TP is not set CONFIG_PPPOLAC=y CONFIG_PPPOPNS=y # CONFIG_SLIP is not set @@ -2195,7 +2193,6 @@ CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y CONFIG_SND_HWDEP=y -CONFIG_SND_RAWMIDI=y CONFIG_SND_JACK=y # CONFIG_SND_SEQUENCER is not set # CONFIG_SND_MIXER_OSS is not set @@ -2308,7 +2305,7 @@ CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10 CONFIG_SND_ARM=y CONFIG_SND_SPI=y CONFIG_SND_USB=y -CONFIG_SND_USB_AUDIO=y +# CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_UA101 is not set # CONFIG_SND_USB_CAIAQ is not set # CONFIG_SND_USB_6FIRE is not set @@ -2319,10 +2316,16 @@ CONFIG_SND_SOC_TEGRA30_AHUB=y CONFIG_SND_SOC_TEGRA30_DAM=y CONFIG_SND_SOC_TEGRA30_I2S=y CONFIG_SND_SOC_TEGRA30_SPDIF=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_WM8903=y +# CONFIG_SND_SOC_TEGRA_WM8903 is not set +CONFIG_MACH_HAS_SND_SOC_TEGRA_TLV320AIC326X=y +# CONFIG_SND_SOC_TEGRA_TLV320AIC326X is not set CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5639=y # CONFIG_SND_SOC_TEGRA_RT5639 is not set CONFIG_MACH_HAS_SND_SOC_TEGRA_RT5640=y CONFIG_SND_SOC_TEGRA_RT5640=y +CONFIG_MACH_HAS_SND_SOC_TEGRA_MAX98095=y +# CONFIG_SND_SOC_TEGRA_MAX98095 is not set CONFIG_HEADSET_FUNCTION=y CONFIG_SND_SOC_I2C_AND_SPI=y # CONFIG_SND_SOC_ALL_CODECS is not set @@ -2954,14 +2957,23 @@ CONFIG_RIL=y # # File systems # -# CONFIG_EXT2_FS is not set -# CONFIG_EXT3_FS is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y -CONFIG_EXT4_USE_FOR_EXT23=y CONFIG_EXT4_FS_XATTR=y CONFIG_EXT4_FS_POSIX_ACL=y # CONFIG_EXT4_FS_SECURITY is not set # CONFIG_EXT4_DEBUG is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set CONFIG_JBD2=y # CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y @@ -2998,13 +3010,13 @@ CONFIG_FUSE_FS=y # DOS/FAT/NT Filesystems # CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y +# CONFIG_MSDOS_FS is not set CONFIG_VFAT_FS=y CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_NTFS_FS=y # CONFIG_NTFS_DEBUG is not set -CONFIG_NTFS_RW=y +# CONFIG_NTFS_RW is not set # # Pseudo filesystems @@ -3022,9 +3034,8 @@ CONFIG_TMPFS=y CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set -# CONFIG_ECRYPT_FS is not set -CONFIG_HFS_FS=y -CONFIG_HFSPLUS_FS=y +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set @@ -3042,28 +3053,15 @@ CONFIG_HFSPLUS_FS=y # CONFIG_UFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -CONFIG_NFS_V4=y -# CONFIG_NFS_V4_1 is not set +# CONFIG_NFS_V3 is not set +# CONFIG_NFS_V4 is not set CONFIG_ROOT_NFS=y -# CONFIG_NFS_USE_LEGACY_DNS is not set -CONFIG_NFS_USE_KERNEL_DNS=y -# CONFIG_NFS_USE_NEW_IDMAPPER is not set # CONFIG_NFSD is not set CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y # CONFIG_CEPH_FS is not set -CONFIG_CIFS=y -# CONFIG_CIFS_STATS is not set -# CONFIG_CIFS_WEAK_PW_HASH is not set -# CONFIG_CIFS_UPCALL is not set -# CONFIG_CIFS_XATTR is not set -# CONFIG_CIFS_DEBUG2 is not set -# CONFIG_CIFS_DFS_UPCALL is not set +# CONFIG_CIFS is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -3128,7 +3126,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set -CONFIG_NLS_UTF8=y +# CONFIG_NLS_UTF8 is not set # # Kernel hacking @@ -3154,8 +3152,8 @@ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 # CONFIG_DETECT_HUNG_TASK is not set CONFIG_SCHED_DEBUG=y -# CONFIG_SCHEDSTATS is not set -# CONFIG_TIMER_STATS is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y # CONFIG_DEBUG_OBJECTS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_KMEMLEAK is not set @@ -3174,10 +3172,10 @@ CONFIG_STACKTRACE=y # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_INFO_REDUCED is not set -# CONFIG_DEBUG_VM is not set +CONFIG_DEBUG_VM=y # CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set @@ -3185,11 +3183,10 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_DEBUG_CREDENTIALS is not set -CONFIG_FRAME_POINTER=y # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 -# CONFIG_RCU_CPU_STALL_VERBOSE is not set +CONFIG_RCU_CPU_STALL_VERBOSE=y # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set @@ -3207,8 +3204,9 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_C_RECORDMCOUNT=y CONFIG_RING_BUFFER=y CONFIG_EVENT_TRACING=y -CONFIG_EVENT_POWER_TRACING_DEPRECATED=y +# CONFIG_EVENT_POWER_TRACING_DEPRECATED is not set CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_RING_BUFFER_ALLOW_SWAP=y CONFIG_TRACING=y CONFIG_TRACING_SUPPORT=y CONFIG_FTRACE=y @@ -3232,7 +3230,7 @@ CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set # CONFIG_TEST_KSTRTOX is not set # CONFIG_STRICT_DEVMEM is not set -# CONFIG_ARM_UNWIND is not set +CONFIG_ARM_UNWIND=y # CONFIG_DEBUG_USER is not set # CONFIG_DEBUG_LL is not set # CONFIG_OC_ETM is not set @@ -3240,8 +3238,7 @@ CONFIG_HAVE_ARCH_KGDB=y # # Security options # -CONFIG_KEYS=y -# CONFIG_KEYS_DEBUG_PROC_KEYS is not set +# CONFIG_KEYS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set # CONFIG_SECURITY is not set # CONFIG_SECURITYFS is not set @@ -3304,7 +3301,7 @@ CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_CRC32C=y # CONFIG_CRYPTO_GHASH is not set -CONFIG_CRYPTO_MD4=y +# CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_RMD128 is not set From 4a5492058cdfc0c676a276237adc1d34063792d1 Mon Sep 17 00:00:00 2001 From: Francisco Franco Date: Sun, 21 Apr 2013 23:56:39 -0700 Subject: [PATCH 479/678] block: cfq: tweak all CFQ tunables. Researched by my mates malaroth, osm0sis, joaquinf, The Gingerbread Man, pkgnex, Khrushy, shreddintyres. Please if you copy this values make sure you cherry-pick it properly and leave these guys in the commit message, they researched this for a long time. Signed-off-by: Francisco Franco --- block/cfq-iosched.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 36057a141c8..51e74cf1cb1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -20,18 +20,18 @@ * tunables */ /* max queue in one round of service */ -static const int cfq_quantum = 8; -static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; +static const int cfq_quantum = 4; +static const int cfq_fifo_expire[2] = { 80, 330}; /* maximum backwards seek, in KiB */ -static const int cfq_back_max = 16 * 1024; +static const int cfq_back_max = 12582912; /* penalty of a backwards seek */ -static const int cfq_back_penalty = 2; -static const int cfq_slice_sync = HZ / 10; -static int cfq_slice_async = HZ / 25; +static const int cfq_back_penalty = 1; +static const int cfq_slice_sync = 60; +static int cfq_slice_async = 50; static const int cfq_slice_async_rq = 2; -static int cfq_slice_idle = HZ / 125; -static int cfq_group_idle = HZ / 125; -static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ +static int cfq_slice_idle = 0; +static int cfq_group_idle = 0; +static const int cfq_target_latency = 300; /* 300 ms */ static const int cfq_hist_divisor = 4; /* @@ -4272,20 +4272,6 @@ static struct blkio_policy_type blkio_policy_cfq; static int __init cfq_init(void) { - /* - * could be 0 on HZ < 1000 setups - */ - if (!cfq_slice_async) - cfq_slice_async = 1; - if (!cfq_slice_idle) - cfq_slice_idle = 1; - -#ifdef CONFIG_CFQ_GROUP_IOSCHED - if (!cfq_group_idle) - cfq_group_idle = 1; -#else - cfq_group_idle = 0; -#endif if (cfq_slab_setup()) return -ENOMEM; From e66099cb402a4bbe8bc2f48ad4e4a6444ddd9d10 Mon Sep 17 00:00:00 2001 From: Francisco Franco Date: Mon, 22 Apr 2013 22:10:48 -0700 Subject: [PATCH 480/678] block: cfq: finally nailed CFQ tunables correctly, damn multipliers. Thanks osm0sis for cranking this last bit up. Signed-off-by: Francisco Franco --- block/cfq-iosched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 51e74cf1cb1..682463dbfc7 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -21,13 +21,13 @@ */ /* max queue in one round of service */ static const int cfq_quantum = 4; -static const int cfq_fifo_expire[2] = { 80, 330}; +static const int cfq_fifo_expire[2] = { 42, 11}; /* maximum backwards seek, in KiB */ static const int cfq_back_max = 12582912; /* penalty of a backwards seek */ static const int cfq_back_penalty = 1; -static const int cfq_slice_sync = 60; -static int cfq_slice_async = 50; +static const int cfq_slice_sync = 8; +static int cfq_slice_async = 7; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = 0; static int cfq_group_idle = 0; From 350a4ae44a663f30bd22ada54b4c34049fb29f6c Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 17 May 2013 11:55:44 -0400 Subject: [PATCH 481/678] block: deadline: hardcode values based on values courtesy of franco --- block/deadline-iosched.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 586f4e72929..04440a601ab 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -17,10 +17,10 @@ /* * See Documentation/block/deadline-iosched.txt */ -static const int read_expire = HZ / 2; /* max time before a read is submitted. */ -static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -static const int writes_starved = 1; /* max times reads can starve a write */ -static const int fifo_batch = 1; /* # of sequential requests treated as one +static const int read_expire = 250; /* max time before a read is submitted. */ +static const int write_expire = 2500; /* ditto for writes, these limits are SOFT! */ +static const int writes_starved = 2; /* max times reads can starve a write */ +static const int fifo_batch = 8; /* # of sequential requests treated as one by the above parameters. For throughput. */ struct deadline_data { @@ -230,7 +230,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) /* * rq is expired! */ - if (time_after(jiffies, rq_fifo_time(rq))) + if (time_after_eq(jiffies, rq_fifo_time(rq))) return 1; return 0; @@ -385,8 +385,8 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \ __data = jiffies_to_msecs(__data); \ return deadline_var_show(__data, (page)); \ } -SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); -SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); +SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 0); +SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 0); SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); @@ -408,8 +408,8 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) *(__PTR) = __data; \ return ret; \ } -STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); -STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 0); +STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 0); STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); From 1972f98c5a7f4ad89984f1c9ef81215717cd28f9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:14 -0400 Subject: [PATCH 482/678] Revert "mach-tegra: usbhost.c: disable usbhost_fixed_install_mode" This reverts commit dd3c516ea0a408034a4fdf15235e38e3177c0ecd. --- arch/arm/mach-tegra/usbhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 3b34114021f..38c87df3a2c 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -97,7 +97,7 @@ int usbhost_init(void) int retval; // default values - usbhost_fixed_install_mode = 0; + usbhost_fixed_install_mode = 1; usbhost_hotplug_on_boot = 1; usbhost_fastcharge_in_host_mode = 0; printk("usbhost %s FI=%d HP=%d FC=%d\n", __func__, usbhost_fixed_install_mode, From bb82effa8a2f39b06243191ebe331bdcd2e45d64 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:27 -0400 Subject: [PATCH 483/678] Revert "enabled FTDI; added sysfs usbhost_hostmode; usbhost_hotplug_on_boot=1 as default; card.c now evaluating usbhost_hotplug_on_boot" This reverts commit ed98ab3a869754eef2c8d29509f1d71531b2ecc7. --- arch/arm/mach-tegra/usbhost.c | 22 +-------------- drivers/power/smb347-charger.c | 39 +++++++++++++-------------- sound/usb/card.c | 49 +++++++++++++--------------------- 3 files changed, 38 insertions(+), 72 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 38c87df3a2c..28c24b08e68 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -60,29 +60,11 @@ static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute static struct kobj_attribute hotplug_on_boot_attribute = __ATTR(usbhost_hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); -/* ----------------------------------------- */ -int usbhost_hostmode; - -static ssize_t hostmode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", usbhost_hostmode); -} - -static ssize_t hostmode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) -{ - sscanf(buf, "%du", &usbhost_hostmode); - return count; -} - -static struct kobj_attribute hostmode_attribute = - __ATTR(usbhost_hostmode, 0666, hostmode_show, hostmode_store); - /* ----------------------------------------- */ static struct attribute *attrs[] = { &fixed_install_mode_attribute.attr, &hotplug_on_boot_attribute.attr, &fastcharge_in_host_mode_attribute.attr, - &hostmode_attribute.attr, NULL, }; @@ -98,10 +80,8 @@ int usbhost_init(void) // default values usbhost_fixed_install_mode = 1; - usbhost_hotplug_on_boot = 1; + usbhost_hotplug_on_boot = 0; usbhost_fastcharge_in_host_mode = 0; - printk("usbhost %s FI=%d HP=%d FC=%d\n", __func__, usbhost_fixed_install_mode, - usbhost_hotplug_on_boot, usbhost_fastcharge_in_host_mode); usbhost_kobj = kobject_create_and_add("usbhost", kernel_kobj); if (!usbhost_kobj) { diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 8b9f1c89edc..0ec5da60d7d 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -137,12 +137,11 @@ static int gpio_dock_in = 0; extern int usbhost_fixed_install_mode; extern int usbhost_fastcharge_in_host_mode; -extern int usbhost_hostmode; volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; -//static volatile int lastOtgState = 0; +static volatile int lastOtgState = 0; static volatile int lastChargeSlaveDevicesState = 0; static volatile int hostmode_waiting_for_power = 0; @@ -264,7 +263,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch int ret = 0; printk("smb347_configure_otg otg=%d chargeSlaves=%d stopSlaves=%d lastOtg=%d\n", - enableOTG, chargeSlaves, stopChargeSlaves, usbhost_hostmode); + enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); /*Enable volatile writes to registers*/ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -303,7 +302,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } if(enableOTG>0) { - if(!usbhost_hostmode) { + if(!lastOtgState) { printk("smb347_configure_otg enable host mode\n"); ret = smb347_update_reg(client, smb347_CMD_REG, 0x10); if (ret < 0) { @@ -311,10 +310,10 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch "0x%02x\n", __func__, smb347_CMD_REG); goto error; } - usbhost_hostmode = 1; + lastOtgState = 1; } } else if(enableOTG==0) { - if(usbhost_hostmode) { + if(lastOtgState) { printk("smb347_configure_otg disable host mode\n"); ret = smb347_read(client, smb347_CMD_REG); if (ret < 0) { @@ -327,7 +326,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch dev_err(&client->dev, "%s: err %d\n", __func__, ret); goto error; } - usbhost_hostmode=0; + lastOtgState=0; } } @@ -880,8 +879,8 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo int ret; int newExternalPowerState=0; - printk("smb347_otg_status from=%d to=%d hostmode=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", - from,to,usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState,usbhost_fixed_install_mode); + printk("smb347_otg_status from=%d to=%d lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", + from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,usbhost_fixed_install_mode); if(to==10) { // prevent race condition bug: only when going suspend (OTG PULL) @@ -1000,8 +999,8 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo } lastExternalPowerState = newExternalPowerState; - printk("smb347_otg_status DONE hostmode=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", - usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState); + printk("smb347_otg_status DONE lastOtgState=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", + lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); } /* workqueue function */ @@ -1170,10 +1169,10 @@ static void inok_isr_work_function(struct work_struct *dat) // called on power loss/gain, but also if just a bare (non-powered) OTG adapter is pulled // also if FI is disabled via sysfs - printk("inok_isr_work_function hostmode=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", - usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState); + printk("inok_isr_work_function lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", + lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); - if(usbhost_hostmode>0 && lastExternalPowerState>0) { + if(lastOtgState>0 && lastExternalPowerState>0) { // we used to be in externally powered host mode // this means external power was just lost cancel_delayed_work(&charger->curr_limit_work); @@ -1223,8 +1222,8 @@ static void inok_isr_work_function(struct work_struct *dat) "otg..\n", __func__); } - printk("inok_isr_work_function done hostmode=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", - usbhost_hostmode,lastExternalPowerState,lastChargeSlaveDevicesState); + printk("inok_isr_work_function done lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", + lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); return; } @@ -1247,7 +1246,7 @@ static void inok_isr_work_function(struct work_struct *dat) if(!lastChargeSlaveDevicesState) { // make external power detectable printk("inok_isr_work_function make external power detectable2\n"); - // 2013-01-28: crash here after (in the mobile version only?) + // 2013-01-28: crash here after int ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -1261,7 +1260,7 @@ static void inok_isr_work_function(struct work_struct *dat) lastExternalPowerState = 1; // host_mode_charging_state may have been set by cable_type_detect() - if(host_mode_charging_state>0 && usbhost_hostmode==0) { + if(host_mode_charging_state>0 && lastOtgState==0) { printk("inok_isr_work_function external power available, start host mode\n"); if(smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState)<0) dev_err(&client->dev, "%s() error in configuring" @@ -1269,7 +1268,7 @@ static void inok_isr_work_function(struct work_struct *dat) } //smb347_clear_interrupts(client); // FIXME??? - printk("inok_isr_work_function external power available hostmode=%d\n",usbhost_hostmode); + printk("inok_isr_work_function external power available lastOtgState=%d\n",lastOtgState); } static void dockin_isr_work_function(struct work_struct *dat) @@ -1370,7 +1369,6 @@ int smb347_event_fi(void) { // called by usbhost.c sysfs change from user space struct i2c_client *client = charger->client; printk("smb347_event_fi %d\n",usbhost_fixed_install_mode); -/* if(usbhost_fixed_install_mode>0) { // from OTG to FI // make external power detectable in case it is coming back @@ -1427,7 +1425,6 @@ int smb347_event_fi(void) { // DOES power the slave, but slaves are NOT detected, not even when replugged // music plays on speaker } -*/ } diff --git a/sound/usb/card.c b/sound/usb/card.c index 0f5e1d880c1..54b9f1f5465 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -78,11 +78,10 @@ MODULE_LICENSE("GPL"); MODULE_SUPPORTED_DEVICE("{{Generic,USB Audio}}"); // tmtmtm -extern int usbhost_hotplug_on_boot; struct timer_list my_timer; struct usb_device *postpone_usb_snd_dev = NULL; struct device_driver *postpone_usb_snd_drv = NULL; -extern struct device_driver *current_drv; // from base/dd.c +extern struct device_driver *current_drv; static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ @@ -434,11 +433,11 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx, } //tmtmtm -static void mykthread(void *unused) +static int mykthread(void *unused) { printk("##### sound/usb/card.c mykthread driver_attach\n"); if(postpone_usb_snd_drv!=NULL) - driver_attach(postpone_usb_snd_drv); // drives/base/dd.c + driver_attach(postpone_usb_snd_drv); } static void delayed_func(unsigned long unused) { @@ -481,32 +480,23 @@ snd_usb_audio_probe(struct usb_device *dev, if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; - // tmtmtm - // we may not want the USB DAC, connected at boot time, to become - // the primary sound card, rather for it to become available as - // an *overlay* primary sound card, so we postpone device probe - if(usbhost_hotplug_on_boot) { - struct timespec tp; ktime_get_ts(&tp); - if (tp.tv_sec<8 && postpone_usb_snd_dev==NULL) { - printk("##### sound/usb/card.c DON'T REGISTER EARLY tv_sec=%d ++++++++++++++++++++\n",tp.tv_sec); - - // it would be good if the delayed call to driver_attach() (which will result in UEVENT ALSA_ID) - // would not be done by time (20 sec), but by ??? - // the current strategy may prove to be not 100% reliable - - postpone_usb_snd_dev = dev; - postpone_usb_snd_drv = current_drv; - init_timer(&my_timer); - my_timer.expires = jiffies + 18*HZ; // n*HZ = delay in number of seconds - my_timer.function = delayed_func; - add_timer(&my_timer); - printk("##### sound/usb/card.c delayed call to driver_attach initiated\n"); - goto __err_val; - } - printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); - } else { - printk("##### sound/usb/card.c REGISTER !hotplug_on_boot\n"); + // tmtmtm: we don't want the USB DAC to become the primary sound card + // in order for a USB DAC, connected at boot time, to become available as + // an *overlay* primary sound card, we must postpone device probe + + struct timespec tp; ktime_get_ts(&tp); + if (tp.tv_sec<8 && postpone_usb_snd_dev==NULL) { + printk("##### sound/usb/card.c DON'T REGISTER EARLY tv_sec=%d ++++++++++++++++++++\n",tp.tv_sec); + postpone_usb_snd_dev = dev; + postpone_usb_snd_drv = current_drv; + init_timer(&my_timer); + my_timer.expires = jiffies + 20*HZ; // n*HZ = delay in number of seconds + my_timer.function = delayed_func; + add_timer(&my_timer); + printk("##### sound/usb/card.c delayed call to driver_attach initiated\n"); + goto __err_val; } + //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) @@ -584,7 +574,6 @@ snd_usb_audio_probe(struct usb_device *dev, chip->num_interfaces++; chip->probing = 0; mutex_unlock(®ister_mutex); - printk("##### sound/usb/card.c snd_usb_audio_probe done OK\n"); return chip; __error: From 16e798b4a8f26dad11a2bfe57f994b90d0d5239d Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:30 -0400 Subject: [PATCH 484/678] Revert "adding credentials" This reverts commit 98a79c2760ee578a4cb36206870afc2e8b225510. --- arch/arm/mach-tegra/usbhost.c | 16 +++++++++------- drivers/power/smb347-charger.c | 28 ++++++++++++++++++---------- drivers/usb/host/ehci-tegra.c | 30 ++++++++++++++++++++++++++++-- drivers/usb/otg/tegra-otg.c | 12 ++++++++++-- 4 files changed, 65 insertions(+), 21 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 28c24b08e68..145f5bcd70d 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -4,11 +4,10 @@ extern int smb347_event_fi(void); extern int smb347_event_fastcharge(void); -// Copyright (C) 2013 Timur Mehrvarz -// TODO: need to persist all 3 usbhost_* values +// TODO: need to persist all 3 values /* ----------------------------------------- */ -int usbhost_fixed_install_mode; +int usbhost_fixed_install_mode = 0; static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -18,6 +17,7 @@ static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribu static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { sscanf(buf, "%du", &usbhost_fixed_install_mode); + // TODO: event -> smb347_charger.c smb347_event_fi(); return count; } @@ -26,7 +26,7 @@ static struct kobj_attribute fixed_install_mode_attribute = __ATTR(usbhost_fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); /* ----------------------------------------- */ -int usbhost_fastcharge_in_host_mode; +int usbhost_fastcharge_in_host_mode = 0; static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -36,6 +36,7 @@ static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_at static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); + // TODO: event -> smb347_charger.c smb347_event_fastcharge(); return count; } @@ -44,7 +45,7 @@ static struct kobj_attribute fastcharge_in_host_mode_attribute = __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); /* ----------------------------------------- */ -int usbhost_hotplug_on_boot; +int usbhost_hotplug_on_boot = 0; static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -60,7 +61,9 @@ static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute static struct kobj_attribute hotplug_on_boot_attribute = __ATTR(usbhost_hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); -/* ----------------------------------------- */ + + + static struct attribute *attrs[] = { &fixed_install_mode_attribute.attr, &hotplug_on_boot_attribute.attr, @@ -78,7 +81,6 @@ int usbhost_init(void) { int retval; - // default values usbhost_fixed_install_mode = 1; usbhost_hotplug_on_boot = 0; usbhost_fastcharge_in_host_mode = 0; diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 0ec5da60d7d..e3a3a8a41e4 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -4,7 +4,6 @@ * Battery charger driver for smb347 from summit microelectronics * * Copyright (c) 2012, NVIDIA Corporation. - * Copyright (C) 2013 Timur Mehrvarz * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -135,6 +134,8 @@ static unsigned int project_id; static unsigned int pcba_ver; static int gpio_dock_in = 0; +// tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 +//static int fixed_install_mode = 0; extern int usbhost_fixed_install_mode; extern int usbhost_fastcharge_in_host_mode; @@ -330,7 +331,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } } - // tmtmtm: we never charge slave devices in fixed_install_mode + // tmtmtm: we will never charge slave devices in fixed_install_mode if(!usbhost_fixed_install_mode) { if(chargeSlaves) { if(!lastChargeSlaveDevicesState) { @@ -350,7 +351,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch if(stopChargeSlaves) { if(lastChargeSlaveDevicesState) { - printk("smb347_configure_otg stop charging slaves\n"); + //printk("smb347_configure_otg stop charging slaves\n"); /* Configure INOK to be active low */ ret = smb347_read(client, smb347_SYSOK_USB3); if (ret < 0) { @@ -782,7 +783,7 @@ int smb347_hc_mode_callback(bool enable, int cur) } } - /* Disable volatile writes to registers */ + /* Disable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); if (ret < 0) { dev_err(&client->dev, "%s() error in configuring charger..\n", @@ -794,6 +795,8 @@ int smb347_hc_mode_callback(bool enable, int cur) return ret; error: + //if(ret!=0) + // printk(KERN_INFO "smb347_hc_mode_callback ERROR %d\n",ret); return ret; } EXPORT_SYMBOL_GPL(smb347_hc_mode_callback); @@ -888,9 +891,9 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // and use host_mode_charging_state's current value (probably charging), // before we call cable_type_detect() (when it will likely switch to not charging) // FIXME: but is tegra_usb_resume not only called on OTG PLUG? - // FIXME: better "so that tegra_ehci_irq() can run first" ? + // FIXME: do I mean "so that tegra_ehci_irq() can run first" ? schedule_timeout_interruptible(msecs_to_jiffies(100)); - // pausing here, smb347_resume() will call cable_type_detect() before we do (see: below) + // when doing this pause, smb347_resume() will call cable_type_detect() before we do below } cable_type_detect(); @@ -918,6 +921,9 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // tmtmtm: mobile-mode: we need to be careful NOT to disable charger detection too early // once we start charging slaves ourselfs, we will not be able to detect ext power coming in + // also: why are we waiting here, if inok_isr_work_function is called on power + // we actually depend on it to arrive in parallel + // make external power detectable in case it is coming back printk("smb347_otg_status make external power detectable1\n"); ret = smb347_configure_interrupts(client); @@ -989,7 +995,9 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo } } - // make external power detectable in case it is coming back + //if(!newExternalPowerState /*&& !lastChargeSlaveDevicesState*/) { + // make external power detectable in case it is coming back + // FIXME: probably better do this only, if lastChargeSlaveDevicesState is not set if(!lastChargeSlaveDevicesState) { printk("smb347_otg_status make external power detectable2\n"); ret = smb347_configure_interrupts(client); @@ -1122,7 +1130,7 @@ static int cable_type_detect(void) touch_callback(usb_cable); #endif } - host_mode_charging_state = 1; // tmtmtm + host_mode_charging_state = 1; // tmtmtm } else { charger->cur_cable_type = unknow_cable; @@ -1140,7 +1148,7 @@ static int cable_type_detect(void) charger->cur_cable_type = unknow_cable; printk(KERN_INFO "USBIN=0\n"); - // tmtmtm: fix: battery tab keeps stating "Charging (AC)" + // tmtmtm: battery tab keeps stating "Charging (AC)" if(usbhost_fixed_install_mode) { host_mode_charging_state = 0; printk(KERN_INFO "cable_type_detect() disabled host_mode_charging_state ############\n"); @@ -1209,7 +1217,7 @@ static void inok_isr_work_function(struct work_struct *dat) // so we can continue host mode in OTG mode // if we would NOT call smb347_otg_status() here, slave devices would stay without power now - // tmtmtm: we may not want to call this, if the OTG-adapter is pulled (not just power) +// tmtmtm: we don't want to call this, if OTG-adapter is pulled (not just power) smb347_otg_status(OTG_STATE_A_HOST,OTG_STATE_A_HOST,NULL); } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 53fbac3e5c1..24443ebf5b6 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -3,7 +3,6 @@ * * Copyright (C) 2010 Google, Inc. * Copyright (C) 2009 - 2011 NVIDIA Corporation - * Copyright (C) 2013 Timur Mehrvarz * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -60,6 +59,8 @@ extern void baseband_xmm_L3_resume_check(void); extern volatile int smb347_deep_sleep; // tmtmtm: from smb347-charger.c +//extern volatile int host_mode_charging_state; // tmtmtm: from smb347-charger.c +//extern int fixed_install_mode; // tmtmtm: from smb347-charger.c static struct usb_hcd *modem_ehci_handle; struct tegra_ehci_hcd { @@ -221,15 +222,27 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } else if (tegra->bus_suspended && tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { + // tmtmtm: OTG UNPLUG + // original intent: when waking up from deep sleep, skip the default return, + // if host_mode_charging AND fixed_install_mode are set + //if(host_mode_charging_state && fixed_install_mode) { + // printk("ehci-tegra %s waking up with host_mode_charging: special\n", __func__); if(smb347_deep_sleep) { printk("ehci-tegra %s wake-up/OTG-UNPLUG with smb347_deep_sleep: special\n", __func__); // fix: skip default return + + // FIXME + // DAS EINSCHRÄNKEN AUF DEN fixed_install_mode löst das problem nur im MOBILE kernel + // ECHTE LÖSUNG: ONLY skip default return when really waking up from deep sleep + // das kann sowohl bei unplug als auch bei plug passieren } else { + printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal return\n", __func__); spin_unlock(&ehci->lock); return 0; } } spin_unlock(&ehci->lock); + //printk("ehci-tegra %s post spin_unlock\n", __func__); } irq_status = ehci_irq(hcd); @@ -239,12 +252,15 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } if (ehci->controller_remote_wakeup) { + //printk("ehci-tegra %s ehci->controller_remote_wakeup\n", __func__); ehci->controller_remote_wakeup = false; /* disable interrupts */ ehci_writel(ehci, 0, &ehci->regs->intr_enable); tegra_usb_phy_preresume(tegra->phy, true); tegra->port_resuming = 1; + //printk("ehci-tegra %s ehci->controller_remote_wakeup done\n", __func__); } + //printk("ehci-tegra %s return irq_status=%d\n", __func__,irq_status); return irq_status; } @@ -614,11 +630,21 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) tegra_ehci_power_up(hcd, is_dpd); set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); + // tmtmtm: OTG PLUG + // original intent: skip the default restart, if host_mode_charging is set + //if(host_mode_charging_state) { + // printk("ehci-tegra ######### tegra_usb_resume host_mode_charging: special\n"); if(smb347_deep_sleep) { printk("ehci-tegra %s wake-up/OTG-PLUG with smb347_deep_sleep: special\n", __func__); + // kommt u.a. + // wenn der gepowerte OTG adapter gesteckt wird (mobile-use kernel) + // fix: skip default restart } else if ((tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) || (hsic) || - (null_ulpi)) { + (null_ulpi)) + { + //printk("ehci-tegra #### tegra_usb_resume !host_mode_charging: restart\n"); + printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal restart\n", __func__); goto restart; } diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index bfc97d04fc0..9be485e8b21 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -5,7 +5,6 @@ * * Copyright (C) 2010 NVIDIA Corp. * Copyright (C) 2010 Google, Inc. - * Copyright (C) 2013 Timur Mehrvarz * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -169,6 +168,7 @@ void tegra_start_host(struct tegra_otg_data *tegra) void tegra_stop_host(struct tegra_otg_data *tegra) { + //dev_info(tegra->otg.dev, "tegra_stop_host\n"); if (tegra->pdev) { tegra_usb_otg_host_unregister(tegra->pdev); tegra->pdev = NULL; @@ -233,11 +233,17 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); + if(smb347_deep_sleep>0) { + smb347_deep_sleep = 0; + dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); + } + // tmtmtm - smb347_deep_sleep = 0; if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) { + //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s before\n"); if (tegra->charger_cb) { + //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s\n"); tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() } tegra_stop_host(tegra); @@ -251,8 +257,10 @@ static void irq_work(struct work_struct *work) //if (from != OTG_STATE_A_HOST) if (from == OTG_STATE_A_SUSPEND) { if (tegra->charger_cb) { + //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h\n"); tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() } + //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h after\n"); tegra_start_host(tegra); } } From 99ee1e280309a9bef14cc61bd1f1ef537eea3324 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:32 -0400 Subject: [PATCH 485/678] Revert "fast charge switch" This reverts commit 1ae2442ea8026605fc90b2aac9f82c4e7d07799a. --- arch/arm/mach-tegra/usbhost.c | 41 +++++------ drivers/power/smb347-charger.c | 127 +++++---------------------------- 2 files changed, 35 insertions(+), 133 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index 145f5bcd70d..bff07c37d9a 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -1,12 +1,8 @@ #include #include -extern int smb347_event_fi(void); -extern int smb347_event_fastcharge(void); - // TODO: need to persist all 3 values -/* ----------------------------------------- */ int usbhost_fixed_install_mode = 0; static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -17,34 +13,14 @@ static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribu static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { sscanf(buf, "%du", &usbhost_fixed_install_mode); - // TODO: event -> smb347_charger.c - smb347_event_fi(); return count; } static struct kobj_attribute fixed_install_mode_attribute = __ATTR(usbhost_fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); -/* ----------------------------------------- */ -int usbhost_fastcharge_in_host_mode = 0; - -static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", usbhost_fastcharge_in_host_mode); -} - -static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) -{ - sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); - // TODO: event -> smb347_charger.c - smb347_event_fastcharge(); - return count; -} -static struct kobj_attribute fastcharge_in_host_mode_attribute = - __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); -/* ----------------------------------------- */ int usbhost_hotplug_on_boot = 0; static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -63,6 +39,23 @@ static struct kobj_attribute hotplug_on_boot_attribute = +int usbhost_fastcharge_in_host_mode = 0; + +static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", usbhost_fastcharge_in_host_mode); +} + +static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); + return count; +} + +static struct kobj_attribute fastcharge_in_host_mode_attribute = + __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); + + static struct attribute *attrs[] = { &fixed_install_mode_attribute.attr, diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index e3a3a8a41e4..50afc8c9203 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -110,10 +110,6 @@ #define ADAPTER_PROTECT_DELAY (4*HZ) #define GPIO_AC_OK TEGRA_GPIO_PV1 -/* Global functions declaration */ -int smb347_event_fi(void); -int smb347_event_fastcharge(void); - /* Functions declaration */ static int smb347_configure_charger(struct i2c_client *client, int value); static int smb347_configure_interrupts(struct i2c_client *client); @@ -138,6 +134,11 @@ static int gpio_dock_in = 0; //static int fixed_install_mode = 0; extern int usbhost_fixed_install_mode; extern int usbhost_fastcharge_in_host_mode; +// TODO: need select threads to dicover value changes +// when usbhost_fixed_install_mode from 1 to 0: enable slave charging +// when usbhost_fixed_install_mode from 0 to 1: disable slave charging +// when usbhost_fastcharge_in_host_mode from 0 to 1: enable fast charging (when in host mode) +// when usbhost_fastcharge_in_host_mode from 1 to 0: disable fast charging (when in host mode) volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; @@ -263,8 +264,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch { int ret = 0; - printk("smb347_configure_otg otg=%d chargeSlaves=%d stopSlaves=%d lastOtg=%d\n", - enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); + printk("smb347_configure_otg %d %d %d %d\n",enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); /*Enable volatile writes to registers*/ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -519,7 +519,7 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) else setting |= 0x03; - printk(KERN_INFO "[charger] set charger limit, limit=%u retval =%x setting=%x\n", + printk(KERN_INFO "[charger] set cahrger limmit, limit=%u retval =%x setting=%x\n", current_limit, retval, setting); ret = smb347_write(client, smb347_CHRG_CRNTS, setting); @@ -925,7 +925,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // we actually depend on it to arrive in parallel // make external power detectable in case it is coming back - printk("smb347_otg_status make external power detectable1\n"); + printk("smb347_otg_status make external power detectable\n"); ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -999,7 +999,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo // make external power detectable in case it is coming back // FIXME: probably better do this only, if lastChargeSlaveDevicesState is not set if(!lastChargeSlaveDevicesState) { - printk("smb347_otg_status make external power detectable2\n"); + printk("smb347_otg_status make external power detectable\n"); ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -1176,7 +1176,6 @@ static void inok_isr_work_function(struct work_struct *dat) struct i2c_client *client = charger->client; // called on power loss/gain, but also if just a bare (non-powered) OTG adapter is pulled - // also if FI is disabled via sysfs printk("inok_isr_work_function lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); @@ -1236,9 +1235,7 @@ static void inok_isr_work_function(struct work_struct *dat) } // we were NOT in externally powered host mode - if(!lastChargeSlaveDevicesState) { - cable_type_detect(); - } + cable_type_detect(); if(charger->cur_cable_type!=1 && charger->cur_cable_type!=3) { // still no power incoming printk("inok_isr_work_function no power lastExternalPowerState=%d\n",lastExternalPowerState); @@ -1251,16 +1248,14 @@ static void inok_isr_work_function(struct work_struct *dat) lastExternalPowerState = 0; } - if(!lastChargeSlaveDevicesState) { - // make external power detectable - printk("inok_isr_work_function make external power detectable2\n"); - // 2013-01-28: crash here after - int ret = smb347_configure_interrupts(client); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - printk("inok_isr_work_function make external power detectable2 done\n"); - } + // make external power detectable + printk("inok_isr_work_function make external power detectable2\n"); + // 2013-01-28: crash here after + int ret = smb347_configure_interrupts(client); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + printk("inok_isr_work_function make external power detectable2 done\n"); return; } @@ -1373,92 +1368,6 @@ static void smb347_default_setback(void) } } -int smb347_event_fi(void) { - // called by usbhost.c sysfs change from user space - struct i2c_client *client = charger->client; - printk("smb347_event_fi %d\n",usbhost_fixed_install_mode); - if(usbhost_fixed_install_mode>0) { - // from OTG to FI - // make external power detectable in case it is coming back - int ret = smb347_configure_interrupts(client); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - // battery will be charged - ret = smb347_configure_charger(client, 1); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - - // enable OTG, disable slave charging - if(smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState)<0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - - // inok_isr_work_function() will now be called - schedule_timeout_interruptible(msecs_to_jiffies(100)); - cable_type_detect(); - - // FIXME: switching from OTG to FI: does NOT remove power from slave (only briefly; needs OTG-cables to be pulled) - // will also NOT accept external power now - - // wenn ich anschliessend OTG ziehe und aufstecke, - // fährt alles hoch, nur der DAC wird nicht erkannt (device not accepting address 3, error -32) - // erst wenn ich dem DAC den strom ziehe, geht er wieder - // ergo: durch das aus- und einschalten von FI wird der DAC temporär gestört - - } else { - // from FI to OTG: enable slave charging - printk("enable slave charging lastExternalPowerState=%d\n",lastExternalPowerState); - // battery will NOT be charged - int ret = smb347_configure_charger(client, 0); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - if(lastExternalPowerState) { - cancel_delayed_work(&charger->curr_limit_work); - cancel_delayed_work(&charger->inok_isr_work); - smb347_clear_interrupts(client); - - // make device aware it is now discharging - lastExternalPowerState = 0; - } - // enableOTG, chargeSlaves, don't stopChargeSlaves - if(smb347_configure_otg(client, 1, 1, 0)<0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - // inok_isr_work_function() will now be called - - // FIXME: switching from FI to OTG (after power being removed): - // DOES power the slave, but slaves are NOT detected, not even when replugged - // music plays on speaker - } -} - - -int smb347_event_fastcharge(void) { - // called by usbhost.c sysfs change from user space - printk("smb347_event_fastcharge %d\n",usbhost_fastcharge_in_host_mode); - if(host_mode_charging_state>0) { - if(usbhost_fastcharge_in_host_mode) { - printk(KERN_INFO "host mode charging ac\n"); - charger->cur_cable_type = ac_cable; - battery_callback(ac_cable); -#ifdef TOUCH_CALLBACK_ENABLED - touch_callback(ac_cable); -#endif - } else { - printk(KERN_INFO "host mode charging usb\n"); - charger->cur_cable_type = usb_cable; - battery_callback(usb_cable); -#ifdef TOUCH_CALLBACK_ENABLED - touch_callback(usb_cable); -#endif - } - } -} - - static int __devinit smb347_probe(struct i2c_client *client, const struct i2c_device_id *id) { From 73adbd212e6bf9f171a4d1d648652d9ec91c1855 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:34 -0400 Subject: [PATCH 486/678] Revert "changes to allow dynamic switching of usbhost_fixed_install_mode; resolving issues around lastChargeSlaveDevicesState" This reverts commit 083176f993e482bc6bb99c6b4850c89bcb545904. --- drivers/power/smb347-charger.c | 48 ++++++++++++++++------------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 50afc8c9203..cb6f71c0e08 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -347,25 +347,25 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); } } - } - - if(stopChargeSlaves) { - if(lastChargeSlaveDevicesState) { - //printk("smb347_configure_otg stop charging slaves\n"); - /* Configure INOK to be active low */ - ret = smb347_read(client, smb347_SYSOK_USB3); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } + else + if(stopChargeSlaves) { + if(lastChargeSlaveDevicesState) { + //printk("smb347_configure_otg stop charging slaves\n"); + /* Configure INOK to be active low */ + ret = smb347_read(client, smb347_SYSOK_USB3); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } - ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; + ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + lastChargeSlaveDevicesState = 0; + printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); } - lastChargeSlaveDevicesState = 0; - printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); } } @@ -913,7 +913,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo "otg..\n", __func__); // disableOTG, dont chargeSlaves, don't stopChargeSlaves printk("smb347_otg_status disableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); - ret = smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState); + ret = smb347_configure_otg(client, 0, 0, 0); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); @@ -971,7 +971,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo "otg..\n", __func__); // enableOTG, don't chargeSlaves, don't stopChargeSlaves printk("smb347_otg_status enableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); - ret = smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState); + ret = smb347_configure_otg(client, 1, 0, 0); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); @@ -997,14 +997,12 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo //if(!newExternalPowerState /*&& !lastChargeSlaveDevicesState*/) { // make external power detectable in case it is coming back - // FIXME: probably better do this only, if lastChargeSlaveDevicesState is not set - if(!lastChargeSlaveDevicesState) { printk("smb347_otg_status make external power detectable\n"); ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); - } + //} lastExternalPowerState = newExternalPowerState; printk("smb347_otg_status DONE lastOtgState=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", @@ -1191,7 +1189,7 @@ static void inok_isr_work_function(struct work_struct *dat) // stop host-mode, don't chargeSlaves, don't stopChargeSlaves printk("inok_isr_work_function fixed_install stop host-mode, don't chargeSlaves, don't stopChargeSlaves\n"); - if(smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState)<0) + if(smb347_configure_otg(client, 0, 0, 0)<0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); @@ -1220,7 +1218,7 @@ static void inok_isr_work_function(struct work_struct *dat) smb347_otg_status(OTG_STATE_A_HOST,OTG_STATE_A_HOST,NULL); } - if(!lastExternalPowerState && !lastChargeSlaveDevicesState) { + if(!lastExternalPowerState) { // make external power detectable in case it is coming back printk("inok_isr_work_function make external power detectable1\n"); int ret = smb347_configure_interrupts(client); @@ -1265,7 +1263,7 @@ static void inok_isr_work_function(struct work_struct *dat) // host_mode_charging_state may have been set by cable_type_detect() if(host_mode_charging_state>0 && lastOtgState==0) { printk("inok_isr_work_function external power available, start host mode\n"); - if(smb347_configure_otg(client, 1, 0, lastChargeSlaveDevicesState)<0) + if(smb347_configure_otg(client, 1, 0, 0)<0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); } From 1cfd70006e0e326ad93d4d8e97c97a318aa52ee1 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:36 -0400 Subject: [PATCH 487/678] Revert "added fast-charging in hostmode support" This reverts commit 04b1bedb71ecc3d0f8e2ec6d5087895a8f9a0120. --- arch/arm/mach-tegra/usbhost.c | 32 ++++++++++---------- drivers/power/smb347-charger.c | 54 ++++++++++------------------------ drivers/usb/otg/tegra-otg.c | 6 ++-- 3 files changed, 33 insertions(+), 59 deletions(-) diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c index bff07c37d9a..835bd0ebd49 100644 --- a/arch/arm/mach-tegra/usbhost.c +++ b/arch/arm/mach-tegra/usbhost.c @@ -1,59 +1,57 @@ #include #include -// TODO: need to persist all 3 values - -int usbhost_fixed_install_mode = 0; +int fixed_install_mode = 0; static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", usbhost_fixed_install_mode); + return sprintf(buf, "%d\n", fixed_install_mode); } static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - sscanf(buf, "%du", &usbhost_fixed_install_mode); + sscanf(buf, "%du", &fixed_install_mode); return count; } static struct kobj_attribute fixed_install_mode_attribute = - __ATTR(usbhost_fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); + __ATTR(fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); -int usbhost_hotplug_on_boot = 0; +int hotplug_on_boot = 0; static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", usbhost_hotplug_on_boot); + return sprintf(buf, "%d\n", hotplug_on_boot); } static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - sscanf(buf, "%du", &usbhost_hotplug_on_boot); + sscanf(buf, "%du", &hotplug_on_boot); return count; } static struct kobj_attribute hotplug_on_boot_attribute = - __ATTR(usbhost_hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); + __ATTR(hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); -int usbhost_fastcharge_in_host_mode = 0; +int fastcharge_in_host_mode = 0; static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", usbhost_fastcharge_in_host_mode); + return sprintf(buf, "%d\n", fastcharge_in_host_mode); } static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - sscanf(buf, "%du", &usbhost_fastcharge_in_host_mode); + sscanf(buf, "%du", &fastcharge_in_host_mode); return count; } static struct kobj_attribute fastcharge_in_host_mode_attribute = - __ATTR(usbhost_fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); + __ATTR(fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); @@ -74,9 +72,9 @@ int usbhost_init(void) { int retval; - usbhost_fixed_install_mode = 1; - usbhost_hotplug_on_boot = 0; - usbhost_fastcharge_in_host_mode = 0; + fixed_install_mode = 0; + hotplug_on_boot = 0; + fastcharge_in_host_mode = 0; usbhost_kobj = kobject_create_and_add("usbhost", kernel_kobj); if (!usbhost_kobj) { diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index cb6f71c0e08..9f10ec17ad9 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -132,14 +132,7 @@ static int gpio_dock_in = 0; // tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 //static int fixed_install_mode = 0; -extern int usbhost_fixed_install_mode; -extern int usbhost_fastcharge_in_host_mode; -// TODO: need select threads to dicover value changes -// when usbhost_fixed_install_mode from 1 to 0: enable slave charging -// when usbhost_fixed_install_mode from 0 to 1: disable slave charging -// when usbhost_fastcharge_in_host_mode from 0 to 1: enable fast charging (when in host mode) -// when usbhost_fastcharge_in_host_mode from 1 to 0: disable fast charging (when in host mode) - +extern int fixed_install_mode; volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; @@ -275,7 +268,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } // tmtmtm: we will never charge slave devices in fixed_install_mode - if(!usbhost_fixed_install_mode) { + if(!fixed_install_mode) { if(chargeSlaves) { if(!lastChargeSlaveDevicesState) { /* Configure INOK to be active high */ @@ -332,7 +325,7 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch } // tmtmtm: we will never charge slave devices in fixed_install_mode - if(!usbhost_fixed_install_mode) { + if(!fixed_install_mode) { if(chargeSlaves) { if(!lastChargeSlaveDevicesState) { /* Change "OTG output current limit" from 250mA to 750mA */ @@ -412,7 +405,7 @@ static int smb347_configure_charger(struct i2c_client *client, int value) */ } else { // tmtmtm: make sure to NEVER call this in fixed_install_mode - printk("smb347_configure_charger do not charge; fixed_install_mode=%d\n",usbhost_fixed_install_mode); + printk("smb347_configure_charger do not charge; fixed_install_mode=%d\n",fixed_install_mode); ret = smb347_read(client, smb347_CMD_REG); if (ret < 0) { dev_err(&client->dev, "%s: err %d\n", __func__, ret); @@ -474,8 +467,6 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) if (charger->curr_limit == current_limit) return ret; - printk("smb347_set_InputCurrentlimit %d\n",current_limit); - wake_lock(&charger_wakelock); /* Enable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -883,11 +874,11 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo int newExternalPowerState=0; printk("smb347_otg_status from=%d to=%d lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", - from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,usbhost_fixed_install_mode); + from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,fixed_install_mode); if(to==10) { - // prevent race condition bug: only when going suspend (OTG PULL) - // insert small sleep, so that ehci-tegra #### tegra_usb_resume can run first + // only when going suspend (OTG PULL) + // small sleep, so that ehci-tegra #### tegra_usb_resume can run first // and use host_mode_charging_state's current value (probably charging), // before we call cable_type_detect() (when it will likely switch to not charging) // FIXME: but is tegra_usb_resume not only called on OTG PLUG? @@ -904,7 +895,7 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo if(!newExternalPowerState) { // no external power - if(usbhost_fixed_install_mode) { + if(fixed_install_mode) { // allow battery to be charged printk("smb347_otg_status allow battery to be charged\n"); ret = smb347_configure_charger(client, 1); @@ -934,7 +925,6 @@ static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, vo printk("smb347_otg_status waiting for external power...\n"); // if power is detected, inok_isr_work_function will strike after aprox 1500 ms schedule_timeout_interruptible(msecs_to_jiffies(500)); - // FIXME: abort condition schedule_timeout_interruptible(msecs_to_jiffies(500)); schedule_timeout_interruptible(msecs_to_jiffies(400)); schedule_timeout_interruptible(msecs_to_jiffies(400)); @@ -1067,7 +1057,7 @@ static int cable_type_detect(void) // tmtmtm charger->cur_cable_type = ac_cable; - if(usbhost_fixed_install_mode) { + if(fixed_install_mode) { host_mode_charging_state = 1; printk(KERN_INFO "cable_type_detect() enabled host_mode_charging_state on DC_IN ######\n"); } @@ -1090,7 +1080,6 @@ static int cable_type_detect(void) touch_callback(ac_cable); #endif } else if (retval == APSD_DCP) { - // Asus power supply printk(KERN_INFO "Cable: DCP\n"); charger->cur_cable_type = ac_cable; success = battery_callback(ac_cable); @@ -1112,24 +1101,13 @@ static int cable_type_detect(void) touch_callback(usb_cable); #endif } else if(retval == APSD_HOST_MODE_CHARGING) { // tmtmtm - - if(usbhost_fastcharge_in_host_mode) { - printk(KERN_INFO "Cable: host mode charging ac\n"); - charger->cur_cable_type = ac_cable; - success = battery_callback(ac_cable); -#ifdef TOUCH_CALLBACK_ENABLED - touch_callback(ac_cable); -#endif - } else { - printk(KERN_INFO "Cable: host mode charging usb\n"); - charger->cur_cable_type = usb_cable; - success = battery_callback(usb_cable); + printk(KERN_INFO "Cable: host mode charging\n"); + charger->cur_cable_type = usb_cable; + success = battery_callback(usb_cable); + host_mode_charging_state = 1; // tmtmtm #ifdef TOUCH_CALLBACK_ENABLED - touch_callback(usb_cable); + touch_callback(usb_cable); #endif - } - host_mode_charging_state = 1; // tmtmtm - } else { charger->cur_cable_type = unknow_cable; printk(KERN_INFO "Unkown Plug In Cable type !\n"); @@ -1147,7 +1125,7 @@ static int cable_type_detect(void) printk(KERN_INFO "USBIN=0\n"); // tmtmtm: battery tab keeps stating "Charging (AC)" - if(usbhost_fixed_install_mode) { + if(fixed_install_mode) { host_mode_charging_state = 0; printk(KERN_INFO "cable_type_detect() disabled host_mode_charging_state ############\n"); } @@ -1184,7 +1162,7 @@ static void inok_isr_work_function(struct work_struct *dat) cancel_delayed_work(&charger->inok_isr_work); // tmtmtm: no external power: in fixed_install_mode we prepare for power to come back - if(usbhost_fixed_install_mode) { + if(fixed_install_mode) { smb347_clear_interrupts(client); // stop host-mode, don't chargeSlaves, don't stopChargeSlaves diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index 9be485e8b21..def2b91bebe 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -233,10 +233,8 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - if(smb347_deep_sleep>0) { - smb347_deep_sleep = 0; - dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); - } + smb347_deep_sleep = 0; + dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); // tmtmtm if (to == OTG_STATE_A_SUSPEND) { From a59569db86e611c5fcc3d35477a87477ae2eba2a Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:39 -0400 Subject: [PATCH 488/678] Revert "usbhost kconfig" This reverts commit 77bb2604802398e65ed288f81577ca6323863cad. --- arch/arm/mach-tegra/Kconfig | 7 --- arch/arm/mach-tegra/Makefile | 2 - arch/arm/mach-tegra/usbhost.c | 96 ---------------------------------- drivers/power/smb347-charger.c | 9 ++-- drivers/usb/host/ehci-tegra.c | 1 + sound/usb/card.c | 4 +- 6 files changed, 6 insertions(+), 113 deletions(-) delete mode 100644 arch/arm/mach-tegra/usbhost.c diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index ff52f42efbd..9b328e97a68 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -635,11 +635,4 @@ config TEGRA_PREINIT_CLOCKS help Preinitialize Tegra clocks to known states before actual full- scale clock initialization starts. - -config USBHOST - bool "USBHOST related runtime configuration" - default y - help - A simple sysfs interface to allow switching between OTG and FI mode. - endif diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index a6fe3553ad1..9ea2aefe87a 100755 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -28,8 +28,6 @@ obj-y += pm.o obj-$(CONFIG_TEGRA_WDT_RECOVERY) += wdt-recovery.o obj-$(CONFIG_PM_SLEEP) += pm-irq.o obj-y += gic.o -#obj-y += otgfi.o # tmtmtm -obj-$(CONFIG_USBHOST) += usbhost.o # tmtmtm obj-y += sleep.o diff --git a/arch/arm/mach-tegra/usbhost.c b/arch/arm/mach-tegra/usbhost.c deleted file mode 100644 index 835bd0ebd49..00000000000 --- a/arch/arm/mach-tegra/usbhost.c +++ /dev/null @@ -1,96 +0,0 @@ -#include -#include - -int fixed_install_mode = 0; - -static ssize_t fixed_install_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", fixed_install_mode); -} - -static ssize_t fixed_install_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) -{ - sscanf(buf, "%du", &fixed_install_mode); - return count; -} - -static struct kobj_attribute fixed_install_mode_attribute = - __ATTR(fixed_install_mode, 0666, fixed_install_mode_show, fixed_install_mode_store); - - - -int hotplug_on_boot = 0; - -static ssize_t hotplug_on_boot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", hotplug_on_boot); -} - -static ssize_t hotplug_on_boot_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) -{ - sscanf(buf, "%du", &hotplug_on_boot); - return count; -} - -static struct kobj_attribute hotplug_on_boot_attribute = - __ATTR(hotplug_on_boot, 0666, hotplug_on_boot_show, hotplug_on_boot_store); - - - -int fastcharge_in_host_mode = 0; - -static ssize_t fastcharge_in_host_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", fastcharge_in_host_mode); -} - -static ssize_t fastcharge_in_host_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) -{ - sscanf(buf, "%du", &fastcharge_in_host_mode); - return count; -} - -static struct kobj_attribute fastcharge_in_host_mode_attribute = - __ATTR(fastcharge_in_host_mode, 0666, fastcharge_in_host_mode_show, fastcharge_in_host_mode_store); - - - -static struct attribute *attrs[] = { - &fixed_install_mode_attribute.attr, - &hotplug_on_boot_attribute.attr, - &fastcharge_in_host_mode_attribute.attr, - NULL, -}; - -static struct attribute_group attr_group = { - .attrs = attrs, -}; - -static struct kobject *usbhost_kobj; - -int usbhost_init(void) -{ - int retval; - - fixed_install_mode = 0; - hotplug_on_boot = 0; - fastcharge_in_host_mode = 0; - - usbhost_kobj = kobject_create_and_add("usbhost", kernel_kobj); - if (!usbhost_kobj) { - return -ENOMEM; - } - retval = sysfs_create_group(usbhost_kobj, &attr_group); - if (retval) - kobject_put(usbhost_kobj); - return retval; -} - -void usbhost_exit(void) -{ - kobject_put(usbhost_kobj); -} - -module_init(usbhost_init); -module_exit(usbhost_exit); - diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 9f10ec17ad9..cf4828f27ef 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -131,8 +131,7 @@ static unsigned int pcba_ver; static int gpio_dock_in = 0; // tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 -//static int fixed_install_mode = 0; -extern int fixed_install_mode; +static int fixed_install_mode = 1; volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; @@ -1198,7 +1197,7 @@ static void inok_isr_work_function(struct work_struct *dat) if(!lastExternalPowerState) { // make external power detectable in case it is coming back - printk("inok_isr_work_function make external power detectable1\n"); + printk("inok_isr_work_function make external power detectable\n"); int ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" @@ -1225,13 +1224,11 @@ static void inok_isr_work_function(struct work_struct *dat) } // make external power detectable - printk("inok_isr_work_function make external power detectable2\n"); - // 2013-01-28: crash here after + printk("inok_isr_work_function make external power detectable\n"); int ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); - printk("inok_isr_work_function make external power detectable2 done\n"); return; } diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 24443ebf5b6..bf728f259ef 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -632,6 +632,7 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) // tmtmtm: OTG PLUG // original intent: skip the default restart, if host_mode_charging is set + // FIXME: hier keine einschränkung auf fixed_install_mode? //if(host_mode_charging_state) { // printk("ehci-tegra ######### tegra_usb_resume host_mode_charging: special\n"); if(smb347_deep_sleep) { diff --git a/sound/usb/card.c b/sound/usb/card.c index 54b9f1f5465..6fb41f99965 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -479,7 +479,7 @@ snd_usb_audio_probe(struct usb_device *dev, le16_to_cpu(dev->descriptor.idProduct)); if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; - +/* // tmtmtm: we don't want the USB DAC to become the primary sound card // in order for a USB DAC, connected at boot time, to become available as // an *overlay* primary sound card, we must postpone device probe @@ -497,7 +497,7 @@ snd_usb_audio_probe(struct usb_device *dev, goto __err_val; } //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); - +*/ if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) goto __err_val; From 9df263baa9e1015dfd715e55c57896246468e2fb Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:41 -0400 Subject: [PATCH 489/678] Revert "disabled dac as secondary" This reverts commit a90788f25543b37e3c831bdbe582673dc7ffa604. --- drivers/power/smb347-charger.c | 2 +- sound/usb/card.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index cf4828f27ef..e752633e2ef 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -131,7 +131,7 @@ static unsigned int pcba_ver; static int gpio_dock_in = 0; // tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 -static int fixed_install_mode = 1; +static int fixed_install_mode = 0; volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c static volatile int host_mode_charging_state = 0; static volatile int lastExternalPowerState = 0; diff --git a/sound/usb/card.c b/sound/usb/card.c index 6fb41f99965..54b9f1f5465 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -479,7 +479,7 @@ snd_usb_audio_probe(struct usb_device *dev, le16_to_cpu(dev->descriptor.idProduct)); if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; -/* + // tmtmtm: we don't want the USB DAC to become the primary sound card // in order for a USB DAC, connected at boot time, to become available as // an *overlay* primary sound card, we must postpone device probe @@ -497,7 +497,7 @@ snd_usb_audio_probe(struct usb_device *dev, goto __err_val; } //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); -*/ + if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) goto __err_val; From 1e9c83d76a8397f95924e6403a08981d36eefdc7 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:51:49 -0400 Subject: [PATCH 490/678] Revert "USB AUDIO as secondary sound device" This reverts commit e86e42c29947a30e286007ce902778fb29d0bf8d. --- drivers/base/dd.c | 15 +++------------ sound/usb/card.c | 47 ----------------------------------------------- 2 files changed, 3 insertions(+), 59 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 7c8155d234f..6658da743c3 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -110,7 +110,7 @@ static int really_probe(struct device *dev, struct device_driver *drv) int ret = 0; atomic_inc(&probe_count); - pr_debug("dd.c bus: '%s': %s: probing driver %s with device %s\n", + pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); WARN_ON(!list_empty(&dev->devres_head)); @@ -201,11 +201,10 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) { int ret = 0; - //pr_info("dd.c driver_probe_device\n"); if (!device_is_registered(dev)) return -ENODEV; - pr_debug("dd.c bus: '%s': %s: matched device %s with driver %s\n", + pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); pm_runtime_get_noresume(dev); @@ -306,17 +305,9 @@ static int __driver_attach(struct device *dev, void *data) * returns 0 and the @dev->driver is set, we've found a * compatible pair. */ - -// tmtmtm -struct device_driver *current_drv = NULL; - int driver_attach(struct device_driver *drv) { - // called by driver.c store_new_id() + usb_store_new_id() - // tmtmtm - current_drv = drv; - return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); - // --> probe, real_probe -> snd_usb_audio_probe + return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); } EXPORT_SYMBOL_GPL(driver_attach); diff --git a/sound/usb/card.c b/sound/usb/card.c index 54b9f1f5465..d8f2bf40145 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -55,8 +54,6 @@ #include #include #include -#include // tmtmtm -#include // tmtmtm #include "usbaudio.h" #include "card.h" @@ -77,12 +74,6 @@ MODULE_DESCRIPTION("USB Audio"); MODULE_LICENSE("GPL"); MODULE_SUPPORTED_DEVICE("{{Generic,USB Audio}}"); -// tmtmtm -struct timer_list my_timer; -struct usb_device *postpone_usb_snd_dev = NULL; -struct device_driver *postpone_usb_snd_drv = NULL; -extern struct device_driver *current_drv; - static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ @@ -432,24 +423,6 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx, return 0; } -//tmtmtm -static int mykthread(void *unused) -{ - printk("##### sound/usb/card.c mykthread driver_attach\n"); - if(postpone_usb_snd_drv!=NULL) - driver_attach(postpone_usb_snd_drv); -} -static void delayed_func(unsigned long unused) -{ - printk("##### sound/usb/card.c delayed_func driver_attach\n"); - - // Must offload to another thread, in order to prevent "BUG: scheduling while atomic" - // "calling block IO api(generic_make_request) from a soft irq thread (read callback) is a bad idea" - int ret = kernel_thread(mykthread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND | SIGCHLD); - printk("##### sound/usb/card.c delayed_func ret=%d\n",ret); -} - - /* * probe the active usb device * @@ -472,7 +445,6 @@ snd_usb_audio_probe(struct usb_device *dev, int ifnum; u32 id; - printk("##### sound/usb/card.c snd_usb_audio_probe\n"); alts = &intf->altsetting[0]; ifnum = get_iface_desc(alts)->bInterfaceNumber; id = USB_ID(le16_to_cpu(dev->descriptor.idVendor), @@ -480,25 +452,6 @@ snd_usb_audio_probe(struct usb_device *dev, if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) goto __err_val; - // tmtmtm: we don't want the USB DAC to become the primary sound card - // in order for a USB DAC, connected at boot time, to become available as - // an *overlay* primary sound card, we must postpone device probe - - struct timespec tp; ktime_get_ts(&tp); - if (tp.tv_sec<8 && postpone_usb_snd_dev==NULL) { - printk("##### sound/usb/card.c DON'T REGISTER EARLY tv_sec=%d ++++++++++++++++++++\n",tp.tv_sec); - postpone_usb_snd_dev = dev; - postpone_usb_snd_drv = current_drv; - init_timer(&my_timer); - my_timer.expires = jiffies + 20*HZ; // n*HZ = delay in number of seconds - my_timer.function = delayed_func; - add_timer(&my_timer); - printk("##### sound/usb/card.c delayed call to driver_attach initiated\n"); - goto __err_val; - } - //printk("##### sound/usb/card.c REGISTER tv_sec=%d ++++++++++++++++++++++++\n",tp.tv_sec); - - if (snd_usb_apply_boot_quirk(dev, intf, quirk) < 0) goto __err_val; From ae153eed5897e0af5bcac27641f6a1a90d2995c0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:52:10 -0400 Subject: [PATCH 491/678] Revert "usb-hostmode-charging" This reverts commit 17ab78668efafd56376874a93f17ed3cbd1a415f. --- drivers/power/smb347-charger.c | 478 ++++++++------------------------- drivers/usb/host/ehci-tegra.c | 39 +-- drivers/usb/otg/tegra-otg.c | 28 +- 3 files changed, 118 insertions(+), 427 deletions(-) diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index e752633e2ef..e466bd548c5 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -103,7 +103,7 @@ #define APSD_DCP 0x02 #define APSD_OTHER 0x03 #define APSD_SDP 0x04 -#define APSD_HOST_MODE_CHARGING 0x06 +#define APSD_SDP2 0x06 // tmtmtm: USB host mode charging #define USB_30 0x20 #define DCIN_OV_UV_STS 0x50 #define DELAY_FOR_CURR_LIMIT_RECONF (60) @@ -130,15 +130,6 @@ static unsigned int project_id; static unsigned int pcba_ver; static int gpio_dock_in = 0; -// tmtmtm: also modify 'export KBUILD_BUILD_USER=timur-usbhost-fi-2013-01-01 -static int fixed_install_mode = 0; -volatile int smb347_deep_sleep = 0; // imported by ehci-tegra.c -static volatile int host_mode_charging_state = 0; -static volatile int lastExternalPowerState = 0; -static volatile int lastOtgState = 0; -static volatile int lastChargeSlaveDevicesState = 0; -static volatile int hostmode_waiting_for_power = 0; - /* Sysfs interface */ static DEVICE_ATTR(reg_status, S_IWUSR | S_IRUGO, smb347_reg_show, NULL); @@ -252,12 +243,10 @@ static void smb347_clear_interrupts(struct i2c_client *client) __func__); } -static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int chargeSlaves, int stopChargeSlaves) +static int smb347_configure_otg(struct i2c_client *client, int enable) { int ret = 0; - printk("smb347_configure_otg %d %d %d %d\n",enableOTG, chargeSlaves, stopChargeSlaves, lastOtgState); - /*Enable volatile writes to registers*/ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); if (ret < 0) { @@ -266,98 +255,68 @@ static int smb347_configure_otg(struct i2c_client *client, int enableOTG, int ch goto error; } - // tmtmtm: we will never charge slave devices in fixed_install_mode - if(!fixed_install_mode) { - if(chargeSlaves) { - if(!lastChargeSlaveDevicesState) { - /* Configure INOK to be active high */ - //printk("smb347_configure_otg INOK to be active high\n"); - ret = smb347_update_reg(client, smb347_SYSOK_USB3, 0x01); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } + if (enable) { - /* Change "OTG output current limit" to 250mA */ - //printk("smb347_configure_otg charge slaves 250mA\n"); - ret = smb347_read(client, smb347_OTG_TLIM_REG); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - ret = smb347_write(client, smb347_OTG_TLIM_REG, (ret & (~(1<<3)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - } + /* Configure INOK to be active high */ + ret = smb347_update_reg(client, smb347_SYSOK_USB3, 0x01); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; } - } - if(enableOTG>0) { - if(!lastOtgState) { - printk("smb347_configure_otg enable host mode\n"); - ret = smb347_update_reg(client, smb347_CMD_REG, 0x10); - if (ret < 0) { - dev_err(&client->dev, "%s: Failed in writing register" - "0x%02x\n", __func__, smb347_CMD_REG); - goto error; - } - lastOtgState = 1; - } - } else if(enableOTG==0) { - if(lastOtgState) { - printk("smb347_configure_otg disable host mode\n"); - ret = smb347_read(client, smb347_CMD_REG); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } + /* Change "OTG output current limit" to 250mA */ + ret = smb347_read(client, smb347_OTG_TLIM_REG); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + ret = smb347_write(client, smb347_OTG_TLIM_REG, (ret & (~(1<<3)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } - ret = smb347_write(client, smb347_CMD_REG, (ret & (~(1<<4)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - lastOtgState=0; - } - } + /* Enable OTG */ + ret = smb347_update_reg(client, smb347_CMD_REG, 0x10); + if (ret < 0) { + dev_err(&client->dev, "%s: Failed in writing register" + "0x%02x\n", __func__, smb347_CMD_REG); + goto error; + } - // tmtmtm: we will never charge slave devices in fixed_install_mode - if(!fixed_install_mode) { - if(chargeSlaves) { - if(!lastChargeSlaveDevicesState) { - /* Change "OTG output current limit" from 250mA to 750mA */ - //printk("smb347_configure_otg charge slaves 750mA\n"); - ret = smb347_update_reg(client, smb347_OTG_TLIM_REG, 0x08); - if (ret < 0) { - dev_err(&client->dev, "%s: Failed in writing register" - "0x%02x\n", __func__, smb347_OTG_TLIM_REG); - goto error; - } - lastChargeSlaveDevicesState = 1; - printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); - } + /* Change "OTG output current limit" from 250mA to 750mA */ + ret = smb347_update_reg(client, smb347_OTG_TLIM_REG, 0x08); + if (ret < 0) { + dev_err(&client->dev, "%s: Failed in writing register" + "0x%02x\n", __func__, smb347_OTG_TLIM_REG); + goto error; + } + + } else { + /* Disable OTG */ + ret = smb347_read(client, smb347_CMD_REG); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + + ret = smb347_write(client, smb347_CMD_REG, (ret & (~(1<<4)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; + } + + /* Configure INOK to be active low */ + ret = smb347_read(client, smb347_SYSOK_USB3); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; } - else - if(stopChargeSlaves) { - if(lastChargeSlaveDevicesState) { - //printk("smb347_configure_otg stop charging slaves\n"); - /* Configure INOK to be active low */ - ret = smb347_read(client, smb347_SYSOK_USB3); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); - if (ret < 0) { - dev_err(&client->dev, "%s: err %d\n", __func__, ret); - goto error; - } - lastChargeSlaveDevicesState = 0; - printk("smb347_configure_otg lastChargeSlaveDevicesState=%d\n",lastChargeSlaveDevicesState); - } + ret = smb347_write(client, smb347_SYSOK_USB3, (ret & (~(1)))); + if (ret < 0) { + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + goto error; } } @@ -385,15 +344,14 @@ static int smb347_configure_charger(struct i2c_client *client, int value) } if (value) { - if(!host_mode_charging_state) { - printk("smb347_configure_charger accept external power\n"); - ret = smb347_update_reg(client, smb347_CMD_REG, ENABLE_CHARGE); - if (ret < 0) { - dev_err(&client->dev, "%s(): Failed in writing register" - "0x%02x\n", __func__, smb347_CMD_REG); - goto error; - } + /* Enable charging */ + ret = smb347_update_reg(client, smb347_CMD_REG, ENABLE_CHARGE); + if (ret < 0) { + dev_err(&client->dev, "%s(): Failed in writing register" + "0x%02x\n", __func__, smb347_CMD_REG); + goto error; } + /* Configure THERM ctrl */ /* ret = smb347_update_reg(client, smb347_THERM_CTRL, THERM_CTRL); @@ -403,8 +361,7 @@ static int smb347_configure_charger(struct i2c_client *client, int value) } */ } else { - // tmtmtm: make sure to NEVER call this in fixed_install_mode - printk("smb347_configure_charger do not charge; fixed_install_mode=%d\n",fixed_install_mode); + /* Disable charging */ ret = smb347_read(client, smb347_CMD_REG); if (ret < 0) { dev_err(&client->dev, "%s: err %d\n", __func__, ret); @@ -425,8 +382,6 @@ static int smb347_configure_charger(struct i2c_client *client, int value) goto error; } error: - if(ret!=0) - printk(KERN_INFO "smb347_configure_charger ERROR %d\n",ret); return ret; } @@ -435,7 +390,6 @@ static int smb347_charger_enable(bool enable) struct i2c_client *client = charger->client; u8 ret = 0; - printk("smb347_charger_enable %d\n",enable); if (enable) { /*Pin Controls -active low */ ret = smb347_update_reg(client, smb347_PIN_CTRL, PIN_ACT_LOW); @@ -557,8 +511,6 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) } error: - if(ret!=0) - printk(KERN_INFO "smb347_set_InputCurrentlimit ERROR %d\n",ret); wake_unlock(&charger_wakelock); return ret; } @@ -624,8 +576,6 @@ static int smb347_inok_irq(struct smb347_charger *smb) err2: gpio_free(gpio); err1: - //if(err!=0) - // printk(KERN_INFO "smb347_inok_irq ERROR %d\n",err); return err; } @@ -691,7 +641,7 @@ int smb347_hc_mode_callback(bool enable, int cur) if (charger->suspend_ongoing) return 0; - //printk("smb347_hc_mode_callback+\n"); + printk("smb347_hc_mode_callback+\n"); /* Enable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); @@ -756,7 +706,6 @@ int smb347_hc_mode_callback(bool enable, int cur) } /* Disable auto power source detection (APSD) */ - //printk("smb347_hc_mode_callback Disable auto power source detection\n"); ret = smb347_clear_reg(client, smb347_CHRG_CTRL, ENABLE_APSD); if (ret < 0) { dev_err(&client->dev, "%s(): Failed in writing" @@ -781,12 +730,10 @@ int smb347_hc_mode_callback(bool enable, int cur) goto error; } - //printk("smb347_hc_mode_callback-\n"); + printk("smb347_hc_mode_callback-\n"); return ret; error: - //if(ret!=0) - // printk(KERN_INFO "smb347_hc_mode_callback ERROR %d\n",ret); return ret; } EXPORT_SYMBOL_GPL(smb347_hc_mode_callback); @@ -864,138 +811,46 @@ static int smb347_configure_interrupts(struct i2c_client *client) return ret; } -static int cable_type_detect(void); - static void smb347_otg_status(enum usb_otg_state to, enum usb_otg_state from, void *data) { struct i2c_client *client = charger->client; int ret; - int newExternalPowerState=0; - - printk("smb347_otg_status from=%d to=%d lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d fixed_install_mode=%d\n", - from,to,lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState,fixed_install_mode); - - if(to==10) { - // only when going suspend (OTG PULL) - // small sleep, so that ehci-tegra #### tegra_usb_resume can run first - // and use host_mode_charging_state's current value (probably charging), - // before we call cable_type_detect() (when it will likely switch to not charging) - // FIXME: but is tegra_usb_resume not only called on OTG PLUG? - // FIXME: do I mean "so that tegra_ehci_irq() can run first" ? - schedule_timeout_interruptible(msecs_to_jiffies(100)); - // when doing this pause, smb347_resume() will call cable_type_detect() before we do below - } - - cable_type_detect(); - if (to == OTG_STATE_A_HOST) { - if(charger->cur_cable_type==1 || charger->cur_cable_type==3) - newExternalPowerState = 1; - - if(!newExternalPowerState) { - // no external power - if(fixed_install_mode) { - // allow battery to be charged - printk("smb347_otg_status allow battery to be charged\n"); - ret = smb347_configure_charger(client, 1); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - // disableOTG, dont chargeSlaves, don't stopChargeSlaves - printk("smb347_otg_status disableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); - ret = smb347_configure_otg(client, 0, 0, 0); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - } else { - // tmtmtm: mobile-mode: we need to be careful NOT to disable charger detection too early - // once we start charging slaves ourselfs, we will not be able to detect ext power coming in - - // also: why are we waiting here, if inok_isr_work_function is called on power - // we actually depend on it to arrive in parallel - - // make external power detectable in case it is coming back - printk("smb347_otg_status make external power detectable\n"); - ret = smb347_configure_interrupts(client); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - - printk("smb347_otg_status waiting for external power...\n"); - // if power is detected, inok_isr_work_function will strike after aprox 1500 ms - schedule_timeout_interruptible(msecs_to_jiffies(500)); - schedule_timeout_interruptible(msecs_to_jiffies(500)); - schedule_timeout_interruptible(msecs_to_jiffies(400)); - schedule_timeout_interruptible(msecs_to_jiffies(400)); - if(charger->cur_cable_type==1 || charger->cur_cable_type==3) - newExternalPowerState = 1; - if(!newExternalPowerState) { - cable_type_detect(); - if(charger->cur_cable_type==1 || charger->cur_cable_type==3) - newExternalPowerState = 1; - } - printk("smb347_otg_status waiting for external power done %d\n",newExternalPowerState); - - if(!newExternalPowerState) { - // battery will NOT be charged - ret = smb347_configure_charger(client, 0); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - // enableOTG, chargeSlaves, don't stopChargeSlaves - ret = smb347_configure_otg(client, 1, 1, 0); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - } - } - } + if ((from == OTG_STATE_A_SUSPEND) && (to == OTG_STATE_A_HOST)) { - if(newExternalPowerState) { - // allow battery to be charged - printk("smb347_otg_status allow battery to be charged\n"); - ret = smb347_configure_charger(client, 1); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - // enableOTG, don't chargeSlaves, don't stopChargeSlaves - printk("smb347_otg_status enableOTG, dont chargeSlaves, don't stopChargeSlaves\n"); - ret = smb347_configure_otg(client, 1, 0, 0); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - } + /* configure charger */ + ret = smb347_configure_charger(client, 0); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); - } else if (to == OTG_STATE_A_SUSPEND) { - - if(from == OTG_STATE_A_HOST) { - // disable host-mode and stop slave-charging - printk("smb347_otg_status disable host-mode and stop slave-charging\n"); - ret = smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - // allow battery to be charged - printk("smb347_otg_status allow battery to be charged\n"); - ret = smb347_configure_charger(client, 1); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - } - } + /* ENABLE OTG */ + ret = smb347_configure_otg(client, 1); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); - //if(!newExternalPowerState /*&& !lastChargeSlaveDevicesState*/) { - // make external power detectable in case it is coming back - printk("smb347_otg_status make external power detectable\n"); + } else if ((from == OTG_STATE_A_HOST) && (to == OTG_STATE_A_SUSPEND)) { + + /* Disable OTG */ + ret = smb347_configure_otg(client, 0); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + + /* configure charger */ + ret = smb347_configure_charger(client, 1); + if (ret < 0) + dev_err(&client->dev, "%s() error in configuring" + "otg..\n", __func__); + + /* ret = smb347_configure_interrupts(client); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); - //} - - lastExternalPowerState = newExternalPowerState; - printk("smb347_otg_status DONE lastOtgState=%d externalPowerState=%d chargeSlaveDevicesState=%d\n", - lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); + */ + } } /* workqueue function */ @@ -1007,7 +862,6 @@ static int cable_type_detect(void) int ac_ok = GPIO_AC_OK; int dock_in = gpio_dock_in; - printk(KERN_INFO "cable_type_detect()\n"); /* printk("cable_type_detect %d %lu %d %x jiffies=%lu %lu+\n", charger->old_cable_type, @@ -1018,19 +872,15 @@ static int cable_type_detect(void) charger->time_of_1800mA_limit+(ADAPTER_PROTECT_DELAY*HZ)); */ - if((pcba_ver <= GROUPER_PCBA_ER2) && (project_id == GROUPER_PROJECT_NAKASI)) { - printk(KERN_INFO "cable_type_detect() wrong\n"); + if((pcba_ver <= GROUPER_PCBA_ER2) && (project_id == GROUPER_PROJECT_NAKASI)) return 0; - } - host_mode_charging_state = 0; mutex_lock(&charger->cable_lock); if ((charger->old_cable_type == ac_cable) && charger->time_of_1800mA_limit && gpio_get_value(ac_ok) && time_after(charger->time_of_1800mA_limit+ ADAPTER_PROTECT_DELAY, jiffies)) { - printk(KERN_INFO "cable_type_detect() charger->test_1800mA_fail\n"); smb347_set_InputCurrentlimit(client, 900); charger->test_1800mA_fail = 1; queue_delayed_work(smb347_wq, @@ -1038,7 +888,7 @@ static int cable_type_detect(void) } if (gpio_get_value(ac_ok)) { - printk(KERN_INFO "INOK=H no power\n"); + printk(KERN_INFO "INOK=H\n"); charger->cur_cable_type = non_cable; smb347_set_InputCurrentlimit(client, 900); success = battery_callback(non_cable); @@ -1053,15 +903,8 @@ static int cable_type_detect(void) if (!(retval & DCIN_OV_UV_STS) && !gpio_get_value(dock_in)) { SMB_NOTICE("DC_IN\n"); success = battery_callback(ac_cable); - - // tmtmtm - charger->cur_cable_type = ac_cable; - if(fixed_install_mode) { - host_mode_charging_state = 1; - printk(KERN_INFO "cable_type_detect() enabled host_mode_charging_state on DC_IN ######\n"); - } - } else { + /* cable type dection */ retval = smb347_read(client, smb347_STS_REG_E); SMB_NOTICE("Reg3F : 0x%02x\n", retval); @@ -1099,17 +942,17 @@ static int cable_type_detect(void) #ifdef TOUCH_CALLBACK_ENABLED touch_callback(usb_cable); #endif - } else if(retval == APSD_HOST_MODE_CHARGING) { // tmtmtm - printk(KERN_INFO "Cable: host mode charging\n"); - charger->cur_cable_type = usb_cable; + // tmtmtm start + } else if(retval == APSD_SDP2) { + printk("Cable: SDP2 host mode charging\n"); success = battery_callback(usb_cable); - host_mode_charging_state = 1; // tmtmtm #ifdef TOUCH_CALLBACK_ENABLED touch_callback(usb_cable); -#endif +#endif + // tmtmtm end } else { charger->cur_cable_type = unknow_cable; - printk(KERN_INFO "Unkown Plug In Cable type !\n"); + printk(KERN_INFO "Unkown Plug In Cable type !! retval=%d\n",retval); if (gpio_get_value(dock_in)) { charger->cur_cable_type = usb_cable; success = battery_callback(usb_cable); @@ -1122,13 +965,6 @@ static int cable_type_detect(void) } else { charger->cur_cable_type = unknow_cable; printk(KERN_INFO "USBIN=0\n"); - - // tmtmtm: battery tab keeps stating "Charging (AC)" - if(fixed_install_mode) { - host_mode_charging_state = 0; - printk(KERN_INFO "cable_type_detect() disabled host_mode_charging_state ############\n"); - } - success = battery_callback(non_cable); } } } @@ -1150,106 +986,17 @@ static void inok_isr_work_function(struct work_struct *dat) { struct i2c_client *client = charger->client; - // called on power loss/gain, but also if just a bare (non-powered) OTG adapter is pulled - printk("inok_isr_work_function lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", - lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); - - if(lastOtgState>0 && lastExternalPowerState>0) { - // we used to be in externally powered host mode - // this means external power was just lost - cancel_delayed_work(&charger->curr_limit_work); - cancel_delayed_work(&charger->inok_isr_work); - - // tmtmtm: no external power: in fixed_install_mode we prepare for power to come back - if(fixed_install_mode) { - smb347_clear_interrupts(client); - - // stop host-mode, don't chargeSlaves, don't stopChargeSlaves - printk("inok_isr_work_function fixed_install stop host-mode, don't chargeSlaves, don't stopChargeSlaves\n"); - if(smb347_configure_otg(client, 0, 0, 0)<0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - - // enable external power detection - printk("inok_isr_work_function fixed_install make external power detectable\n"); - if(smb347_configure_interrupts(client)<0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - - lastExternalPowerState = 0; - printk("inok_isr_work_function fixed_install make host aware it is now discharging\n"); - // make device aware it is now discharging - // tmtmtm: notwending ??? - cable_type_detect(); - - } else { - printk("inok_isr_work_function lost external power in host mode; charge slave devices\n"); - - // normally, smb347_otg_status() is called whenever the OTG adapter is pulled or plugged - // here, external power was lost while the OTG adapter remained plugged - // we call smb347_otg_status() now, to activate self-charging of slave devices - // so we can continue host mode in OTG mode - // if we would NOT call smb347_otg_status() here, slave devices would stay without power now - -// tmtmtm: we don't want to call this, if OTG-adapter is pulled (not just power) - smb347_otg_status(OTG_STATE_A_HOST,OTG_STATE_A_HOST,NULL); - } + cancel_delayed_work(&charger->curr_limit_work); + cancel_delayed_work(&charger->inok_isr_work); - if(!lastExternalPowerState) { - // make external power detectable in case it is coming back - printk("inok_isr_work_function make external power detectable\n"); - int ret = smb347_configure_interrupts(client); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - } - - printk("inok_isr_work_function done lastOtgState=%d lastExternalPowerState=%d lastChargeSlaveDevicesState=%d\n", - lastOtgState,lastExternalPowerState,lastChargeSlaveDevicesState); - return; - } - - // we were NOT in externally powered host mode cable_type_detect(); - if(charger->cur_cable_type!=1 && charger->cur_cable_type!=3) { - // still no power incoming - printk("inok_isr_work_function no power lastExternalPowerState=%d\n",lastExternalPowerState); - if(lastExternalPowerState) { - cancel_delayed_work(&charger->curr_limit_work); - cancel_delayed_work(&charger->inok_isr_work); - smb347_clear_interrupts(client); - - // make device aware it is now discharging - lastExternalPowerState = 0; - } - // make external power detectable - printk("inok_isr_work_function make external power detectable\n"); - int ret = smb347_configure_interrupts(client); - if (ret < 0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - return; - } - - // power is incoming - lastExternalPowerState = 1; - - // host_mode_charging_state may have been set by cable_type_detect() - if(host_mode_charging_state>0 && lastOtgState==0) { - printk("inok_isr_work_function external power available, start host mode\n"); - if(smb347_configure_otg(client, 1, 0, 0)<0) - dev_err(&client->dev, "%s() error in configuring" - "otg..\n", __func__); - } - - //smb347_clear_interrupts(client); // FIXME??? - printk("inok_isr_work_function external power available lastOtgState=%d\n",lastOtgState); + smb347_clear_interrupts(client); } static void dockin_isr_work_function(struct work_struct *dat) { - //struct i2c_client *client = charger->client; + struct i2c_client *client = charger->client; int dock_in = gpio_dock_in; int ac_ok = GPIO_AC_OK; @@ -1345,9 +1092,8 @@ static int __devinit smb347_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); - int ret; - //int irq_num; - //uint8_t buf[15]; + int ret, irq_num; + uint8_t buf[15]; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) return -EIO; @@ -1405,6 +1151,7 @@ static int __devinit smb347_probe(struct i2c_client *client, } queue_delayed_work(smb347_wq, &charger->cable_det_work, 0.5*HZ); + ret = register_otg_callback(smb347_otg_status, charger); if (ret < 0) goto error; @@ -1426,7 +1173,6 @@ static int __devexit smb347_remove(struct i2c_client *client) static int smb347_suspend(struct i2c_client *client) { charger->suspend_ongoing = 1; - smb347_deep_sleep = 1; // tmtmtm printk("smb347_suspend+\n"); flush_workqueue(smb347_wq); @@ -1451,7 +1197,7 @@ static int smb347_shutdown(struct i2c_client *client) printk("smb347_shutdown+\n"); /* Disable OTG */ - ret = smb347_configure_otg(client, 0, 0, lastChargeSlaveDevicesState); + ret = smb347_configure_otg(client, 0); if (ret < 0) dev_err(&client->dev, "%s() error in configuring" "otg..\n", __func__); diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index bf728f259ef..d5212ebc968 100755 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -58,9 +58,6 @@ #define USB3_PREFETCH_ID 17 extern void baseband_xmm_L3_resume_check(void); -extern volatile int smb347_deep_sleep; // tmtmtm: from smb347-charger.c -//extern volatile int host_mode_charging_state; // tmtmtm: from smb347-charger.c -//extern int fixed_install_mode; // tmtmtm: from smb347-charger.c static struct usb_hcd *modem_ehci_handle; struct tegra_ehci_hcd { @@ -222,27 +219,11 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } else if (tegra->bus_suspended && tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) { - // tmtmtm: OTG UNPLUG - // original intent: when waking up from deep sleep, skip the default return, - // if host_mode_charging AND fixed_install_mode are set - //if(host_mode_charging_state && fixed_install_mode) { - // printk("ehci-tegra %s waking up with host_mode_charging: special\n", __func__); - if(smb347_deep_sleep) { - printk("ehci-tegra %s wake-up/OTG-UNPLUG with smb347_deep_sleep: special\n", __func__); - // fix: skip default return - - // FIXME - // DAS EINSCHRÄNKEN AUF DEN fixed_install_mode löst das problem nur im MOBILE kernel - // ECHTE LÖSUNG: ONLY skip default return when really waking up from deep sleep - // das kann sowohl bei unplug als auch bei plug passieren - } else { - printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal return\n", __func__); + printk("%s: no device connected before suspend\n", __func__); spin_unlock(&ehci->lock); return 0; - } } spin_unlock(&ehci->lock); - //printk("ehci-tegra %s post spin_unlock\n", __func__); } irq_status = ehci_irq(hcd); @@ -252,15 +233,12 @@ static irqreturn_t tegra_ehci_irq (struct usb_hcd *hcd) } if (ehci->controller_remote_wakeup) { - //printk("ehci-tegra %s ehci->controller_remote_wakeup\n", __func__); ehci->controller_remote_wakeup = false; /* disable interrupts */ ehci_writel(ehci, 0, &ehci->regs->intr_enable); tegra_usb_phy_preresume(tegra->phy, true); tegra->port_resuming = 1; - //printk("ehci-tegra %s ehci->controller_remote_wakeup done\n", __func__); } - //printk("ehci-tegra %s return irq_status=%d\n", __func__,irq_status); return irq_status; } @@ -630,24 +608,9 @@ static int tegra_usb_resume(struct usb_hcd *hcd, bool is_dpd) tegra_ehci_power_up(hcd, is_dpd); set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); - // tmtmtm: OTG PLUG - // original intent: skip the default restart, if host_mode_charging is set - // FIXME: hier keine einschränkung auf fixed_install_mode? - //if(host_mode_charging_state) { - // printk("ehci-tegra ######### tegra_usb_resume host_mode_charging: special\n"); - if(smb347_deep_sleep) { - printk("ehci-tegra %s wake-up/OTG-PLUG with smb347_deep_sleep: special\n", __func__); - // kommt u.a. - // wenn der gepowerte OTG adapter gesteckt wird (mobile-use kernel) - // fix: skip default restart - } else if ((tegra->port_speed > TEGRA_USB_PHY_PORT_SPEED_HIGH) || (hsic) || (null_ulpi)) - { - //printk("ehci-tegra #### tegra_usb_resume !host_mode_charging: restart\n"); - printk("ehci-tegra %s wake-up/OTG-PLUG without smb347_deep_sleep: normal restart\n", __func__); goto restart; - } /* Force the phy to keep data lines in suspend state */ tegra_ehci_phy_restore_start(tegra->phy, tegra->port_speed); diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index def2b91bebe..c1fe7f899f1 100644 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -43,8 +43,6 @@ #define USB_VBUS_STATUS (1 << 10) #define USB_INTS (USB_VBUS_INT_STATUS | USB_ID_INT_STATUS) -extern volatile int smb347_deep_sleep; // tmtmtm: from smb347-charger.c - typedef void (*callback_t)(enum usb_otg_state to, enum usb_otg_state from, void *args); @@ -168,7 +166,6 @@ void tegra_start_host(struct tegra_otg_data *tegra) void tegra_stop_host(struct tegra_otg_data *tegra) { - //dev_info(tegra->otg.dev, "tegra_stop_host\n"); if (tegra->pdev) { tegra_usb_otg_host_unregister(tegra->pdev); tegra->pdev = NULL; @@ -233,36 +230,21 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - smb347_deep_sleep = 0; - dev_info(tegra->otg.dev, "smb347_deep_sleep cleared\n"); + if (tegra->charger_cb) + tegra->charger_cb(to, from, tegra->charger_cb_data); - // tmtmtm if (to == OTG_STATE_A_SUSPEND) { - if (from == OTG_STATE_A_HOST) { - //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s before\n"); - if (tegra->charger_cb) { - //dev_info(tegra->otg.dev, "tegra->charger_cb() h->s\n"); - tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() - } + if (from == OTG_STATE_A_HOST) tegra_stop_host(tegra); - } else if (from == OTG_STATE_B_PERIPHERAL && otg->gadget) usb_gadget_vbus_disconnect(otg->gadget); } else if (to == OTG_STATE_B_PERIPHERAL && otg->gadget) { if (from == OTG_STATE_A_SUSPEND) usb_gadget_vbus_connect(otg->gadget); } else if (to == OTG_STATE_A_HOST) { - //if (from != OTG_STATE_A_HOST) - if (from == OTG_STATE_A_SUSPEND) { - if (tegra->charger_cb) { - //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h\n"); - tegra->charger_cb(to, from, tegra->charger_cb_data); // smb347_otg_status() - } - //dev_info(tegra->otg.dev, "tegra->charger_cb() ?->h after\n"); - tegra_start_host(tegra); - } + if (from == OTG_STATE_A_SUSPEND) + tegra_start_host(tegra); } - dev_info(tegra->otg.dev, "done\n"); } From 7a3bcac310e76db3279c75db8a57211720cfdebe Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 May 2013 09:59:27 -0400 Subject: [PATCH 492/678] defconfig: a55 --- arch/arm/configs/metallice_grouper_defconfig | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 31d9f71097a..8a56f0edf68 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -185,12 +185,12 @@ CONFIG_IOSCHED_CFQ=y # CONFIG_IOSCHED_VR is not set CONFIG_IOSCHED_BFQ=y CONFIG_CGROUP_BFQIO=y -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_ROW=y +CONFIG_DEFAULT_DEADLINE=y +# CONFIG_DEFAULT_ROW is not set # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_BFQ is not set # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="row" +CONFIG_DEFAULT_IOSCHED="deadline" # CONFIG_INLINE_SPIN_TRYLOCK is not set # CONFIG_INLINE_SPIN_TRYLOCK_BH is not set # CONFIG_INLINE_SPIN_LOCK is not set @@ -369,7 +369,6 @@ CONFIG_TEGRA_PLLM_RESTRICTED=y CONFIG_TEGRA_LP2_ARM_TWD=y # CONFIG_TEGRA_SLOW_CSITE is not set # CONFIG_TEGRA_PREINIT_CLOCKS is not set -CONFIG_USBHOST=y # # Processor Type @@ -3010,13 +3009,13 @@ CONFIG_FUSE_FS=y # DOS/FAT/NT Filesystems # CONFIG_FAT_FS=y -# CONFIG_MSDOS_FS is not set +CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_NTFS_FS=y # CONFIG_NTFS_DEBUG is not set -# CONFIG_NTFS_RW is not set +CONFIG_NTFS_RW=y # # Pseudo filesystems From 5ad73fae2e6670e98641a09c610659765e4f6776 Mon Sep 17 00:00:00 2001 From: adogu Date: Thu, 25 Oct 2012 17:22:49 +0800 Subject: [PATCH 493/678] Camera: fine-tune mi1040 power sequence. Change-Id: I6d9b8710a0c6c685252a36f2ca7957aadf923221 --- arch/arm/mach-tegra/board-grouper-sensors.c | 90 +++++++++++---------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-sensors.c b/arch/arm/mach-tegra/board-grouper-sensors.c index 0b3ef7b8772..08141bff9ab 100644 --- a/arch/arm/mach-tegra/board-grouper-sensors.c +++ b/arch/arm/mach-tegra/board-grouper-sensors.c @@ -40,6 +40,9 @@ #define CAM1_LDO_EN_GPIO TEGRA_GPIO_PR6 #define FRONT_YUV_SENSOR_RST_GPIO TEGRA_GPIO_PO0 +#define FRONT_YUV_SENSOR_RST_GPIO_BACH TEGRA_GPIO_PBB0 + +static int front_yuv_sensor_rst_gpio = FRONT_YUV_SENSOR_RST_GPIO; static struct regulator *grouper_1v8_ldo5; static struct regulator *grouper_1v8_cam3; @@ -63,6 +66,11 @@ static const struct i2c_board_info cap1106_i2c1_board_info[] = { static int grouper_camera_init(void) { + u32 project_info = grouper_get_project_id(); + + if (project_info == GROUPER_PROJECT_BACH) + front_yuv_sensor_rst_gpio = FRONT_YUV_SENSOR_RST_GPIO_BACH; + pmic_id = grouper_query_pmic_id(); printk("%s: pmic_id= 0x%X", __FUNCTION__, pmic_id); #if 0 @@ -165,20 +173,6 @@ static int yuv_front_sensor_power_on(void) int ret; printk("yuv_front_sensor_power_on+\n"); - /* AVDD_CAM1, 2.85V, controlled by CAM1_LDO_EN */ - pr_info("gpio %d read as %d\n",CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); - tegra_gpio_enable(CAM1_LDO_EN_GPIO); - ret = gpio_request(CAM1_LDO_EN_GPIO, "cam1_ldo_en"); - if (ret < 0) - pr_err("%s: gpio_request failed for gpio %s, ret= %d\n", - __func__, "CAM1_LDO_EN_GPIO", ret); - pr_info("gpio %d: %d", CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); - gpio_set_value(CAM1_LDO_EN_GPIO, 1); - gpio_direction_output(CAM1_LDO_EN_GPIO, 1); - pr_info("--> %d\n", gpio_get_value(CAM1_LDO_EN_GPIO)); - - msleep(5); - if (!grouper_1v8_ldo5) { if(pmic_id == GROUPER_PMIC_MAXIM) { grouper_1v8_ldo5 = regulator_get(NULL, "vdd_sensor_1v8"); @@ -186,43 +180,49 @@ static int yuv_front_sensor_power_on(void) grouper_1v8_ldo5 = regulator_get(NULL, "avdd_vdac"); } if (IS_ERR_OR_NULL(grouper_1v8_ldo5)) { + if (grouper_1v8_ldo5) { + regulator_put(grouper_1v8_ldo5); + } grouper_1v8_ldo5 = NULL; - pr_err("Can't get grouper_1v8_ldo5.\n"); - goto fail_to_get_reg; + pr_err("%s-: Can't get grouper_1v8_ldo5.\n", __func__); + return -ENODEV; } regulator_set_voltage(grouper_1v8_ldo5, 1800000, 1800000); regulator_enable(grouper_1v8_ldo5); } + msleep(10); + + /* AVDD_CAM1, 2.85V, controlled by CAM1_LDO_EN */ + pr_info("gpio %d read as %d\n",CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); + tegra_gpio_enable(CAM1_LDO_EN_GPIO); + ret = gpio_request(CAM1_LDO_EN_GPIO, "cam1_ldo_en"); + if (ret < 0) + pr_err("%s: gpio_request failed for gpio %s, ret= %d\n", + __func__, "CAM1_LDO_EN_GPIO", ret); + pr_info("gpio %d: %d", CAM1_LDO_EN_GPIO, gpio_get_value(CAM1_LDO_EN_GPIO)); + gpio_set_value(CAM1_LDO_EN_GPIO, 1); + gpio_direction_output(CAM1_LDO_EN_GPIO, 1); + pr_info("--> %d\n", gpio_get_value(CAM1_LDO_EN_GPIO)); + tegra_pinmux_set_tristate(TEGRA_PINGROUP_CAM_MCLK, TEGRA_TRI_NORMAL); + msleep(10); + /* yuv_sensor_rst_lo*/ - tegra_gpio_enable(FRONT_YUV_SENSOR_RST_GPIO); - ret = gpio_request(FRONT_YUV_SENSOR_RST_GPIO, "yuv_sensor_rst_lo"); + tegra_gpio_enable(front_yuv_sensor_rst_gpio); + ret = gpio_request(front_yuv_sensor_rst_gpio, "yuv_sensor_rst_lo"); if (ret < 0) pr_err("%s: gpio_request failed for gpio %s, ret= %d\n", __func__, "FRONT_YUV_SENSOR_RST_GPIO", ret); - pr_info("gpio %d: %d", FRONT_YUV_SENSOR_RST_GPIO, gpio_get_value(FRONT_YUV_SENSOR_RST_GPIO)); - gpio_set_value(FRONT_YUV_SENSOR_RST_GPIO, 1); - gpio_direction_output(FRONT_YUV_SENSOR_RST_GPIO, 1); - pr_info("--> %d\n", gpio_get_value(FRONT_YUV_SENSOR_RST_GPIO)); + pr_info("gpio %d: %d", front_yuv_sensor_rst_gpio, gpio_get_value(front_yuv_sensor_rst_gpio)); + gpio_set_value(front_yuv_sensor_rst_gpio, 1); + gpio_direction_output(front_yuv_sensor_rst_gpio, 1); + pr_info("--> %d\n", gpio_get_value(front_yuv_sensor_rst_gpio)); printk("yuv_front_sensor_power_on-\n"); return 0; - -fail_to_get_reg: - if (grouper_1v8_ldo5) { - regulator_put(grouper_1v8_ldo5); - grouper_1v8_ldo5 = NULL; - } - - gpio_set_value(CAM1_LDO_EN_GPIO, 0); - gpio_direction_output(CAM1_LDO_EN_GPIO, 0); - gpio_free(CAM1_LDO_EN_GPIO); - - printk("yuv_front_sensor_power_on- : -ENODEV\n"); - return -ENODEV; } static int yuv_front_sensor_power_off(void) @@ -230,24 +230,26 @@ static int yuv_front_sensor_power_off(void) printk("%s+\n", __FUNCTION__); if((pmic_id == GROUPER_PMIC_MAXIM) || (pmic_id == GROUPER_PMIC_TI)) { - gpio_set_value(FRONT_YUV_SENSOR_RST_GPIO, 0); - gpio_direction_output(FRONT_YUV_SENSOR_RST_GPIO, 0); - gpio_free(FRONT_YUV_SENSOR_RST_GPIO); + gpio_set_value(front_yuv_sensor_rst_gpio, 0); + gpio_direction_output(front_yuv_sensor_rst_gpio, 0); + gpio_free(front_yuv_sensor_rst_gpio); + + msleep(10); tegra_pinmux_set_tristate(TEGRA_PINGROUP_CAM_MCLK, TEGRA_TRI_TRISTATE); + gpio_set_value(CAM1_LDO_EN_GPIO, 0); + gpio_direction_output(CAM1_LDO_EN_GPIO, 0); + gpio_free(CAM1_LDO_EN_GPIO); + + msleep(10); + if (grouper_1v8_ldo5) { regulator_disable(grouper_1v8_ldo5); regulator_put(grouper_1v8_ldo5); grouper_1v8_ldo5 = NULL; } - msleep(5); - - gpio_set_value(CAM1_LDO_EN_GPIO, 0); - gpio_direction_output(CAM1_LDO_EN_GPIO, 0); - gpio_free(CAM1_LDO_EN_GPIO); - printk("%s-\n", __FUNCTION__); return 0; } else { From 503f4dbc8b501781bc7fd2782f4671cad5439198 Mon Sep 17 00:00:00 2001 From: pomelo_hsieh Date: Mon, 17 Dec 2012 15:47:12 +0800 Subject: [PATCH 494/678] Assign the default value to the interrupt register variable. This is to avoid that the unknown value is restored in resuming if the system failed to suspend. Change-Id: I22e19c9e9def26afa9a104043abddd9ad6b45b79 --- drivers/usb/otg/tegra-otg.c | 3 +++ 1 file changed, 3 insertions(+) mode change 100644 => 100755 drivers/usb/otg/tegra-otg.c diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c old mode 100644 new mode 100755 index c1fe7f899f1..14bdaa1bee6 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -42,6 +42,7 @@ #define USB_VBUS_INT_STATUS (1 << 9) #define USB_VBUS_STATUS (1 << 10) #define USB_INTS (USB_VBUS_INT_STATUS | USB_ID_INT_STATUS) +#define USB_INT_ENS (USB_VBUS_INT_EN | USB_ID_INT_EN | USB_VBUS_WAKEUP_EN | USB_ID_PIN_WAKEUP_EN) typedef void (*callback_t)(enum usb_otg_state to, enum usb_otg_state from, void *args); @@ -428,6 +429,8 @@ static int tegra_otg_probe(struct platform_device *pdev) if (!ehci_pdata->default_enable) clk_disable(tegra->clk); + + tegra->intr_reg_data = tegra->intr_reg_data | USB_INT_ENS; dev_info(&pdev->dev, "otg transceiver registered\n"); return 0; From 026ae1d92d6506592deff826f81d5cbf8ba98d86 Mon Sep 17 00:00:00 2001 From: pomelo_hsieh Date: Mon, 17 Dec 2012 15:46:37 +0800 Subject: [PATCH 495/678] Revert "This is to workaround that the bits USB_VBUS_INT_EN, USB_VBUS_WAKEUP_EN, USB_ID_INT_EN" This reverts commit 714b6a0dbdacfdc54dd607864c23ca4aaf1aed1c. --- drivers/usb/otg/tegra-otg.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index 14bdaa1bee6..9d7b09d0422 100755 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -265,17 +265,6 @@ static irqreturn_t tegra_otg_irq(int irq, void *data) if (val & (USB_VBUS_INT_EN | USB_ID_INT_EN)) { otg_writel(tegra, val, USB_PHY_WAKEUP); if ((val & USB_ID_INT_STATUS) || (val & USB_VBUS_INT_STATUS)) { - tegra->int_status = val; - tegra->detect_vbus = false; - schedule_work(&tegra->work); - } - } else { - if ((val & USB_ID_INT_STATUS) || (val & USB_VBUS_INT_STATUS)) { - printk(KERN_INFO "%s(): WRONG! val = %#X\n", __func__, val); - val |= (USB_VBUS_INT_EN | USB_VBUS_WAKEUP_EN); - val |= (USB_ID_INT_EN | USB_ID_PIN_WAKEUP_EN); - otg_writel(tegra, val, USB_PHY_WAKEUP); - tegra->int_status = val; tegra->detect_vbus = false; schedule_work(&tegra->work); @@ -478,7 +467,7 @@ static int tegra_otg_suspend(struct device *dev) val = tegra_otg->intr_reg_data & ~(USB_ID_INT_EN | USB_VBUS_INT_EN); writel(val, (tegra_otg->regs + USB_PHY_WAKEUP)); clk_disable(tegra_otg->clk); - printk(KERN_INFO "%s(): tegra_otg->intr_reg_data = %#X\n", __func__, tegra_otg->intr_reg_data); + if (from == OTG_STATE_B_PERIPHERAL && otg->gadget) { usb_gadget_vbus_disconnect(otg->gadget); otg->state = OTG_STATE_A_SUSPEND; @@ -503,13 +492,6 @@ static void tegra_otg_resume(struct device *dev) msleep(1); /* restore the interupt enable for cable ID and VBUS */ clk_enable(tegra_otg->clk); - if (!(tegra_otg->intr_reg_data & USB_VBUS_INT_EN) || !(tegra_otg->intr_reg_data & USB_VBUS_WAKEUP_EN) || - !(tegra_otg->intr_reg_data & USB_ID_INT_EN) || !(tegra_otg->intr_reg_data & USB_ID_PIN_WAKEUP_EN)) { - printk(KERN_INFO "%s(): WRONG! tegra_otg->intr_reg_data = %#X\n", __func__, tegra_otg->intr_reg_data); - tegra_otg->intr_reg_data |= (USB_VBUS_INT_EN | USB_VBUS_WAKEUP_EN); - tegra_otg->intr_reg_data |= (USB_ID_INT_EN | USB_ID_PIN_WAKEUP_EN); - } - printk(KERN_INFO "%s(): tegra_otg->intr_reg_data = %#X\n", __func__, tegra_otg->intr_reg_data); writel(tegra_otg->intr_reg_data, (tegra_otg->regs + USB_PHY_WAKEUP)); val = readl(tegra_otg->regs + USB_PHY_WAKEUP); clk_disable(tegra_otg->clk); From 2ee0c7dce61258f5bbf1f8457ea8b236fbfafd26 Mon Sep 17 00:00:00 2001 From: Ken Chang Date: Wed, 26 Sep 2012 12:07:04 +0800 Subject: [PATCH 496/678] arm: tegra: usb_phy: disable PMC mode for USB1 Both ehci host and udc device are registered on USB1. Thus we have both device and host mode phy for USB1. In this case, when we back from LP0, utmi_phy_power_on() were called twice, one for device mode and the other for host mode. For the device mode, it's called by fsl_udc_resume() then the phy is turned off right after the udc driver detected the id pin is low (we are working on host mode). However, the PMC is enabled for usb1 now by calling utmip_powerdown_pmc_wake_detect(), which is call by utmi_phy_power_off(). PMC mode needs to be disabled to switch the pin control back to the host controller. bug 984119 Change-Id: I7bcce88cf5f42e0cbf3ee2d0cc24bbcadf3d51e9 Signed-off-by: Ken Chang --- arch/arm/mach-tegra/usb_phy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index 6e84e3d8279..71c38adf1f2 100755 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -1134,6 +1134,7 @@ static unsigned int tegra_phy_xcvr_setup_value(struct tegra_utmip_config *cfg) return (unsigned int)val; } +static void utmip_phy_disable_pmc_bus_ctrl(struct tegra_usb_phy *phy); static int utmi_phy_power_on(struct tegra_usb_phy *phy, bool is_dpd) { unsigned long val; @@ -1302,6 +1303,8 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy, bool is_dpd) if (phy->mode == TEGRA_USB_PHY_MODE_DEVICE) utmip_powerup_pmc_wake_detect(phy); + else + utmip_phy_disable_pmc_bus_ctrl(phy); #endif return 0; From 736a626935c3e3034a049038c203253ad5044961 Mon Sep 17 00:00:00 2001 From: yi-hsin_hung Date: Fri, 7 Dec 2012 18:00:39 +0800 Subject: [PATCH 497/678] arch: arm: xmm: Update the baseband xmm power state for the race condition issue. Avoid the L3 and then get the modem interrupt to generate CP L2 -> L0 issue because the baseband xmm power state was updated. Change-Id: Ic4a2147102b80635dadc75a7ea1abb39baf53e44 --- arch/arm/mach-tegra/baseband-xmm-power.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/baseband-xmm-power.c b/arch/arm/mach-tegra/baseband-xmm-power.c index 40ef4e1d2da..0daaf1e7d89 100755 --- a/arch/arm/mach-tegra/baseband-xmm-power.c +++ b/arch/arm/mach-tegra/baseband-xmm-power.c @@ -562,6 +562,7 @@ void baseband_xmm_set_power_status(unsigned int status) baseband_xmm_power_driver_handle_resume(data); }*/ pr_info("L0\n"); + baseband_xmm_powerstate = status; value = gpio_get_value(data->modem.xmm.ipc_hsic_active); pr_debug("before L0 ipc_hsic_active=%d\n", value); if (!value) { @@ -580,6 +581,7 @@ void baseband_xmm_set_power_status(unsigned int status) break; case BBXMM_PS_L2: pr_info("L2\n"); + baseband_xmm_powerstate = status; wake_unlock(&wakelock); modem_sleep_flag = true; break; @@ -594,6 +596,7 @@ void baseband_xmm_set_power_status(unsigned int status) } } pr_info("L3\n"); + baseband_xmm_powerstate = status; if (wake_lock_active(&wakelock)) { pr_info("%s: releasing wakelock before L3\n", __func__); @@ -613,9 +616,9 @@ void baseband_xmm_set_power_status(unsigned int status) } else goto exit_without_state_change; default: + baseband_xmm_powerstate = status; break; } - baseband_xmm_powerstate = status; pr_debug("BB XMM POWER STATE = %d\n", status); return; From 7347b51ae5bd54b151eb60e4839e2dfb26e1cfc6 Mon Sep 17 00:00:00 2001 From: yi-hsin_hung Date: Tue, 18 Dec 2012 21:20:01 +0800 Subject: [PATCH 498/678] driver: ril: check the modem hang pin when the system resume. Change-Id: I61e17df16260cc9434d60cf84e6d4ac3ba3addd7 --- arch/arm/mach-tegra/baseband-xmm-power.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-tegra/baseband-xmm-power.c b/arch/arm/mach-tegra/baseband-xmm-power.c index 0daaf1e7d89..12f568d1f9f 100755 --- a/arch/arm/mach-tegra/baseband-xmm-power.c +++ b/arch/arm/mach-tegra/baseband-xmm-power.c @@ -709,6 +709,7 @@ irqreturn_t baseband_xmm_power_ipc_ap_wake_irq(int irq, void *dev_id) wakeup_pending = true; spin_unlock(&xmm_lock); pr_info("CP L3 -> L0\n"); + ril_change_modem_crash_mode(); } } /* save gpio state */ From 7c0c7a469164dd04c80a5e18651fa335ae3ccfaa Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 29 Jan 2013 14:41:02 -0800 Subject: [PATCH 499/678] Revert "Revert "net: wireless: bcmdhd: Fix WD wakelock behavior"" This reverts commit 22b4fcde206e96f57bf0a111403fc3d75532918a. --- drivers/net/wireless/bcmdhd/dhd.h | 8 +++- drivers/net/wireless/bcmdhd/dhd_linux.c | 64 ++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd.h b/drivers/net/wireless/bcmdhd/dhd.h index 8426949a640..b7ab35c71b3 100755 --- a/drivers/net/wireless/bcmdhd/dhd.h +++ b/drivers/net/wireless/bcmdhd/dhd.h @@ -308,6 +308,8 @@ extern int dhd_os_wake_unlock(dhd_pub_t *pub); extern int dhd_os_wake_lock_timeout(dhd_pub_t *pub); extern int dhd_os_wake_lock_rx_timeout_enable(dhd_pub_t *pub, int val); extern int dhd_os_wake_lock_ctrl_timeout_enable(dhd_pub_t *pub, int val); +extern int dhd_os_wd_wake_lock(dhd_pub_t *pub); +extern int dhd_os_wd_wake_unlock(dhd_pub_t *pub); inline static void MUTEX_LOCK_SOFTAP_SET_INIT(dhd_pub_t * dhdp) { @@ -330,8 +332,10 @@ inline static void MUTEX_UNLOCK_SOFTAP_SET(dhd_pub_t * dhdp) #endif /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)) */ } -#define DHD_OS_WAKE_LOCK(pub) dhd_os_wake_lock(pub) -#define DHD_OS_WAKE_UNLOCK(pub) dhd_os_wake_unlock(pub) +#define DHD_OS_WAKE_LOCK(pub) dhd_os_wake_lock(pub) +#define DHD_OS_WAKE_UNLOCK(pub) dhd_os_wake_unlock(pub) +#define DHD_OS_WD_WAKE_LOCK(pub) dhd_os_wd_wake_lock(pub) +#define DHD_OS_WD_WAKE_UNLOCK(pub) dhd_os_wd_wake_unlock(pub) #define DHD_OS_WAKE_LOCK_TIMEOUT(pub) dhd_os_wake_lock_timeout(pub) #define DHD_OS_WAKE_LOCK_RX_TIMEOUT_ENABLE(pub, val) dhd_os_wake_lock_rx_timeout_enable(pub, val) #define DHD_OS_WAKE_LOCK_CTRL_TIMEOUT_ENABLE(pub, val) dhd_os_wake_lock_ctrl_timeout_enable(pub, val) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index a871bdba807..45e99e68677 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -261,6 +261,7 @@ typedef struct dhd_info { struct wake_lock wl_wifi; /* Wifi wakelock */ struct wake_lock wl_rxwake; /* Wifi rx wakelock */ struct wake_lock wl_ctrlwake; /* Wifi ctrl wakelock */ + struct wake_lock wl_wdwake; /* Wifi wd wakelock */ #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)) @@ -272,6 +273,7 @@ typedef struct dhd_info { #endif spinlock_t wakelock_spinlock; int wakelock_counter; + int wakelock_wd_counter; int wakelock_rx_timeout_enable; int wakelock_ctrl_timeout_enable; @@ -1820,7 +1822,6 @@ dhd_watchdog_thread(void *data) dhd_os_spin_unlock(&dhd->pub, flags); } dhd_os_sdunlock(&dhd->pub); - DHD_OS_WAKE_UNLOCK(&dhd->pub); } else { break; } @@ -1834,9 +1835,7 @@ static void dhd_watchdog(ulong data) dhd_info_t *dhd = (dhd_info_t *)data; unsigned long flags; - DHD_OS_WAKE_LOCK(&dhd->pub); if (dhd->pub.dongle_reset) { - DHD_OS_WAKE_UNLOCK(&dhd->pub); return; } @@ -1860,7 +1859,6 @@ static void dhd_watchdog(ulong data) mod_timer(&dhd->timer, jiffies + msecs_to_jiffies(dhd_watchdog_ms)); dhd_os_spin_unlock(&dhd->pub, flags); dhd_os_sdunlock(&dhd->pub); - DHD_OS_WAKE_UNLOCK(&dhd->pub); } #ifdef DHDTHREAD @@ -2804,12 +2802,14 @@ dhd_attach(osl_t *osh, struct dhd_bus *bus, uint bus_hdrlen) /* Initialize Wakelock stuff */ spin_lock_init(&dhd->wakelock_spinlock); dhd->wakelock_counter = 0; + dhd->wakelock_wd_counter = 0; dhd->wakelock_rx_timeout_enable = 0; dhd->wakelock_ctrl_timeout_enable = 0; #ifdef CONFIG_HAS_WAKELOCK wake_lock_init(&dhd->wl_wifi, WAKE_LOCK_SUSPEND, "wlan_wake"); wake_lock_init(&dhd->wl_rxwake, WAKE_LOCK_SUSPEND, "wlan_rx_wake"); wake_lock_init(&dhd->wl_ctrlwake, WAKE_LOCK_SUSPEND, "wlan_ctrl_wake"); + wake_lock_init(&dhd->wl_wdwake, WAKE_LOCK_SUSPEND, "wlan_wd_wake"); #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)) mutex_init(&dhd->dhd_net_if_mutex); @@ -3002,12 +3002,12 @@ dhd_bus_start(dhd_pub_t *dhdp) dhd->wd_timer_valid = FALSE; dhd_os_spin_unlock(&dhd->pub, flags); del_timer_sync(&dhd->timer); - DHD_ERROR(("%s Host failed to register for OOB\n", __FUNCTION__)); #ifdef DHDTHREAD if (dhd->threads_only) dhd_os_sdunlock(dhdp); #endif /* DHDTHREAD */ + DHD_OS_WD_WAKE_UNLOCK(&dhd->pub); return -ENODEV; } @@ -3026,6 +3026,7 @@ dhd_bus_start(dhd_pub_t *dhdp) if (dhd->threads_only) dhd_os_sdunlock(dhdp); #endif /* DHDTHREAD */ + DHD_OS_WD_WAKE_UNLOCK(&dhd->pub); return -ENODEV; } @@ -3923,10 +3924,15 @@ void dhd_detach(dhd_pub_t *dhdp) if (dhd->dhd_state & DHD_ATTACH_STATE_WAKELOCKS_INIT) { #ifdef CONFIG_HAS_WAKELOCK + dhd->wakelock_counter = 0; + dhd->wakelock_wd_counter = 0; + dhd->wakelock_rx_timeout_enable = 0; + dhd->wakelock_ctrl_timeout_enable = 0; wake_lock_destroy(&dhd->wl_wifi); wake_lock_destroy(&dhd->wl_rxwake); wake_lock_destroy(&dhd->wl_ctrlwake); -#endif + wake_lock_destroy(&dhd->wl_wdwake); +#endif /* CONFIG_HAS_WAKELOCK */ } } @@ -4119,11 +4125,18 @@ dhd_os_wd_timer(void *bus, uint wdtick) DHD_TRACE(("%s: Enter\n", __FUNCTION__)); + if (!dhd) + return; + + if (wdtick) + DHD_OS_WD_WAKE_LOCK(pub); + flags = dhd_os_spin_lock(pub); /* don't start the wd until fw is loaded */ if (pub->busstate == DHD_BUS_DOWN) { dhd_os_spin_unlock(pub, flags); + DHD_OS_WD_WAKE_UNLOCK(pub); return; } @@ -4136,6 +4149,7 @@ dhd_os_wd_timer(void *bus, uint wdtick) #else del_timer(&dhd->timer); #endif /* DHDTHREAD */ + DHD_OS_WD_WAKE_UNLOCK(pub); return; } @@ -5013,6 +5027,44 @@ int net_os_wake_unlock(struct net_device *dev) return ret; } +int dhd_os_wd_wake_lock(dhd_pub_t *pub) +{ + dhd_info_t *dhd = (dhd_info_t *)(pub->info); + unsigned long flags; + int ret = 0; + + if (dhd) { + spin_lock_irqsave(&dhd->wakelock_spinlock, flags); +#ifdef CONFIG_HAS_WAKELOCK + if (!dhd->wakelock_wd_counter) + wake_lock(&dhd->wl_wdwake); +#endif + dhd->wakelock_wd_counter++; + ret = dhd->wakelock_wd_counter; + spin_unlock_irqrestore(&dhd->wakelock_spinlock, flags); + } + return ret; +} + +int dhd_os_wd_wake_unlock(dhd_pub_t *pub) +{ + dhd_info_t *dhd = (dhd_info_t *)(pub->info); + unsigned long flags; + int ret = 0; + + if (dhd) { + spin_lock_irqsave(&dhd->wakelock_spinlock, flags); + if (dhd->wakelock_wd_counter) { + dhd->wakelock_wd_counter = 0; +#ifdef CONFIG_HAS_WAKELOCK + wake_unlock(&dhd->wl_wdwake); +#endif + } + spin_unlock_irqrestore(&dhd->wakelock_spinlock, flags); + } + return ret; +} + int dhd_os_check_if_up(void *dhdp) { dhd_pub_t *pub = (dhd_pub_t *)dhdp; From a05876c81a573399fb104027c182f2be30758ee2 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 2 Nov 2012 09:38:42 -0700 Subject: [PATCH 500/678] net: wireless: bcmdhd: Avoid suspend on watchdog Change-Id: Ic41a8f369a2ee8b2a0084e6a1cbf6b454ff53353 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/dhd_linux.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index 45e99e68677..334cea2beb2 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -5011,7 +5011,8 @@ int dhd_os_check_wakelock(void *dhdp) return 0; dhd = (dhd_info_t *)(pub->info); - if (dhd && wake_lock_active(&dhd->wl_wifi)) + if (dhd && (wake_lock_active(&dhd->wl_wifi) || + wake_lock_active(&dhd->wl_wdwake))) return 1; #endif return 0; From 56eabb0e0a44af7e0d0a6adf72407e368479c7c6 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 27 Nov 2012 12:57:32 -0800 Subject: [PATCH 501/678] net: wireless: bcmdhd: Increase PNO wakelock to 7 sec Change-Id: Ife7bac08d16e19b37d16f697e4ad9765ca6efbb7 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/dhd_linux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index 334cea2beb2..b04b9fcd467 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -1650,14 +1650,14 @@ dhd_rx_frame(dhd_pub_t *dhdp, int ifidx, void *pktbuf, int numpkt, uint8 chan) wl_event_to_host_order(&event); if (!tout_ctrl) tout_ctrl = DHD_PACKET_TIMEOUT_MS; - if (event.event_type == WLC_E_BTA_HCI_EVENT) { - dhd_bta_doevt(dhdp, data, event.datalen); - } #ifdef PNO_SUPPORT if (event.event_type == WLC_E_PFN_NET_FOUND) { - tout_ctrl *= 2; + tout_ctrl = 7 * DHD_PACKET_TIMEOUT_MS; } #endif /* PNO_SUPPORT */ + if (event.event_type == WLC_E_BTA_HCI_EVENT) { + dhd_bta_doevt(dhdp, data, event.datalen); + } } else { tout_rx = DHD_PACKET_TIMEOUT_MS; } From f96cc62b86f12377aaf43f6f76f0fc2d3637dd11 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 18 Dec 2012 14:43:34 -0800 Subject: [PATCH 502/678] net: wireless: bcmdhd: Postpone taking wd_wake lock Change-Id: I3926d7a1a357d173144f408996f35f0929db711e Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/dhd_linux.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index b04b9fcd467..071d7be520c 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -4128,15 +4128,13 @@ dhd_os_wd_timer(void *bus, uint wdtick) if (!dhd) return; - if (wdtick) - DHD_OS_WD_WAKE_LOCK(pub); - flags = dhd_os_spin_lock(pub); /* don't start the wd until fw is loaded */ if (pub->busstate == DHD_BUS_DOWN) { dhd_os_spin_unlock(pub, flags); - DHD_OS_WD_WAKE_UNLOCK(pub); + if (!wdtick) + DHD_OS_WD_WAKE_UNLOCK(pub); return; } @@ -4154,6 +4152,7 @@ dhd_os_wd_timer(void *bus, uint wdtick) } if (wdtick) { + DHD_OS_WD_WAKE_LOCK(pub); dhd_watchdog_ms = (uint)wdtick; /* Re arm the timer, at last watchdog period */ mod_timer(&dhd->timer, jiffies + msecs_to_jiffies(dhd_watchdog_ms)); From eb408eedec6fbc1077a79660dbf2f612caeb7a54 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 15 Jan 2013 15:16:31 -0800 Subject: [PATCH 503/678] net: wireless: bcmdhd: Fix PEAP with dynamic WEP Change-Id: I62dffdb3b759ea5ccdf9f7ea0f0e67f928ace92b Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 27963527d41..eab414b46c6 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -2786,7 +2786,9 @@ wl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *dev, bssidx = wl_cfgp2p_find_idx(wl, dev); - if (mac_addr) { + if (mac_addr && + ((params->cipher != WLAN_CIPHER_SUITE_WEP40) && + (params->cipher != WLAN_CIPHER_SUITE_WEP104))) { wl_add_keyext(wiphy, dev, key_idx, mac_addr, params); goto exit; } From d2693fa091048b0ac9fffd8bf26172128b4550c4 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 29 Jan 2013 13:57:51 -0800 Subject: [PATCH 504/678] net: wireless: bcmdhd: Update to version 5.90.195.114 - Get AP beacon and DTIM to set proper DTIM skipping Change-Id: I6bc23f050c144bf8361078ad587bcadbfe3a37fc Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/dhd.h | 9 +++- drivers/net/wireless/bcmdhd/dhd_common.c | 46 +++++++++++++------ drivers/net/wireless/bcmdhd/include/epivers.h | 8 ++-- 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd.h b/drivers/net/wireless/bcmdhd/dhd.h index b7ab35c71b3..c50afa5a125 100755 --- a/drivers/net/wireless/bcmdhd/dhd.h +++ b/drivers/net/wireless/bcmdhd/dhd.h @@ -24,7 +24,7 @@ * software in any way with any other Broadcom software provided under a license * other than the GPL, without Broadcom's express prior written consent. * - * $Id: dhd.h 344123 2012-07-11 09:33:49Z $ + * $Id: dhd.h 357954 2012-09-20 18:22:31Z $ */ /**************** @@ -618,9 +618,14 @@ extern uint dhd_pktgen_len; #define MAX_PKTGEN_LEN 1800 #endif +/* hooks for custom glom setting option via Makefile */ +#define DEFAULT_GLOM_VALUE -1 +#ifndef CUSTOM_GLOM_SETTING +#define CUSTOM_GLOM_SETTING DEFAULT_GLOM_VALUE +#endif /* hooks for custom Roaming Trigger setting via Makefile */ -#define DEFAULT_ROAM_TRIGGER_VALUE -75 /* dBm default roam trigger all band */ +#define DEFAULT_ROAM_TRIGGER_VALUE -65 /* dBm default roam trigger all band */ #define DEFAULT_ROAM_TRIGGER_SETTING -1 #ifndef CUSTOM_ROAM_TRIGGER_SETTING #define CUSTOM_ROAM_TRIGGER_SETTING DEFAULT_ROAM_TRIGGER_VALUE diff --git a/drivers/net/wireless/bcmdhd/dhd_common.c b/drivers/net/wireless/bcmdhd/dhd_common.c index d5af27f40b7..d46864c3a2a 100644 --- a/drivers/net/wireless/bcmdhd/dhd_common.c +++ b/drivers/net/wireless/bcmdhd/dhd_common.c @@ -21,7 +21,7 @@ * software in any way with any other Broadcom software provided under a license * other than the GPL, without Broadcom's express prior written consent. * - * $Id: dhd_common.c 331276 2012-05-04 08:05:57Z $ + * $Id: dhd_common.c 380760 2013-01-23 21:59:27Z $ */ #include #include @@ -1767,14 +1767,11 @@ bool dhd_is_associated(dhd_pub_t *dhd, void *bss_buf, int *retval) int dhd_get_dtim_skip(dhd_pub_t *dhd) { - int bcn_li_dtim; + int bcn_li_dtim = 1; + char buf[128]; int ret = -1; int dtim_assoc = 0; - - if ((dhd->dtim_skip == 0) || (dhd->dtim_skip == 1)) - bcn_li_dtim = 3; - else - bcn_li_dtim = dhd->dtim_skip; + int ap_beacon = 0; /* Check if associated */ if (dhd_is_associated(dhd, NULL, NULL) == FALSE) { @@ -1782,15 +1779,34 @@ dhd_get_dtim_skip(dhd_pub_t *dhd) goto exit; } - /* if assoc grab ap's dtim value */ - if ((ret = dhd_wl_ioctl_cmd(dhd, WLC_GET_DTIMPRD, - &dtim_assoc, sizeof(dtim_assoc), FALSE, 0)) < 0) { + /* read AP beacon if do nother if APs Beacon more that 100msec */ + bcm_mkiovar("bi_assoc", 0, 0, buf, sizeof(buf)); + if ((ret = dhd_wl_ioctl_cmd(dhd, WLC_GET_VAR, buf, sizeof(buf), FALSE, 0)) < 0) { + DHD_ERROR(("%s failed code %d\n", __FUNCTION__, ret)); + goto exit; + } + + ap_beacon = dtoh32(*(int *)buf); + + /* if APs Beacon more that 100msec do no dtim skip */ + if (ap_beacon > 100) { + DHD_ERROR(("%s no dtim skip for AP with %d beacon\n", __FUNCTION__, ap_beacon)); + goto exit; + } + + + /* Read DTIM value if associated */ + memset(buf, 0, sizeof(buf)); + bcm_mkiovar("dtim_assoc", 0, 0, buf, sizeof(buf)); + if ((ret = dhd_wl_ioctl_cmd(dhd, WLC_GET_VAR, buf, sizeof(buf), FALSE, 0)) < 0) { DHD_ERROR(("%s failed code %d\n", __FUNCTION__, ret)); goto exit; } - DHD_ERROR(("%s bcn_li_dtim=%d DTIM=%d Listen=%d\n", - __FUNCTION__, bcn_li_dtim, dtim_assoc, LISTEN_INTERVAL)); + dtim_assoc = dtoh32(*(int *)buf); + + DHD_ERROR(("%s beacom=%d msec bcn_li_dtim=%d DTIM=%d Listen=%d\n", + __FUNCTION__, ap_beacon, bcn_li_dtim, dtim_assoc, LISTEN_INTERVAL)); /* if not assocated just eixt */ if (dtim_assoc == 0) { @@ -1800,12 +1816,16 @@ dhd_get_dtim_skip(dhd_pub_t *dhd) /* check if sta listen interval fits into AP dtim */ if (dtim_assoc > LISTEN_INTERVAL) { /* AP DTIM to big for our Listen Interval : no dtim skiping */ - bcn_li_dtim = 1; DHD_ERROR(("%s DTIM=%d > Listen=%d : too big ...\n", __FUNCTION__, dtim_assoc, LISTEN_INTERVAL)); goto exit; } + if ((dhd->dtim_skip == 0) || (dhd->dtim_skip == 1)) + bcn_li_dtim = 3; + else + bcn_li_dtim = dhd->dtim_skip; + if ((bcn_li_dtim * dtim_assoc) > LISTEN_INTERVAL) { /* Round up dtim_skip to fit into STAs Listen Interval */ bcn_li_dtim = (int)(LISTEN_INTERVAL / dtim_assoc); diff --git a/drivers/net/wireless/bcmdhd/include/epivers.h b/drivers/net/wireless/bcmdhd/include/epivers.h index 37c07e6ec37..fac87f500d1 100644 --- a/drivers/net/wireless/bcmdhd/include/epivers.h +++ b/drivers/net/wireless/bcmdhd/include/epivers.h @@ -33,17 +33,17 @@ #define EPI_RC_NUMBER 195 -#define EPI_INCREMENTAL_NUMBER 104 +#define EPI_INCREMENTAL_NUMBER 114 #define EPI_BUILD_NUMBER 0 -#define EPI_VERSION 5, 90, 195, 104 +#define EPI_VERSION 5, 90, 195, 114 -#define EPI_VERSION_NUM 0x055ac368 +#define EPI_VERSION_NUM 0x055ac372 #define EPI_VERSION_DEV 5.90.195 -#define EPI_VERSION_STR "5.90.195.104" +#define EPI_VERSION_STR "5.90.195.114" #endif From d12de14624234caa9f7e5608ad1101939a56ef99 Mon Sep 17 00:00:00 2001 From: Ed Tam Date: Tue, 5 Feb 2013 16:06:35 -0800 Subject: [PATCH 505/678] tegra3_android_defconfig: Enable CONFIG_EXT4_FS_SECURITY option --- arch/arm/configs/tegra3_android_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index b744c47a923..1be3e6188ea 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -470,6 +470,7 @@ CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y From bc215faf0f91bb6e3efb3253732deb12699a3e4c Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 8 Feb 2013 12:08:48 -0800 Subject: [PATCH 506/678] net: wireless: bcmdhd: Add SUPPORT_PM2_ONLY option Change-Id: Ieb2569cb7fb2bbc56ff9abbc8728a7741fda0027 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/dhd_linux.c | 6 ++++++ drivers/net/wireless/bcmdhd/wl_cfg80211.c | 7 ++++++- drivers/net/wireless/bcmdhd/wl_cfgp2p.c | 4 ++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index 071d7be520c..44532405bbb 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -641,7 +641,9 @@ dhd_dynamic_dtim_skip_release(dhd_pub_t *dhdp) static int dhd_set_suspend(int value, dhd_pub_t *dhd) { +#if !defined(SUPPORT_PM2_ONLY) int power_mode = PM_MAX; +#endif /* wl_pkt_filter_enable_t enable_parm; */ char iovbuf[32]; int bcn_li_dtim = 3; @@ -662,8 +664,10 @@ static int dhd_set_suspend(int value, dhd_pub_t *dhd) /* Kernel suspended */ DHD_ERROR(("%s: force extra Suspend setting\n", __FUNCTION__)); +#if !defined(SUPPORT_PM2_ONLY) dhd_wl_ioctl_cmd(dhd, WLC_SET_PM, (char *)&power_mode, sizeof(power_mode), TRUE, 0); +#endif /* Enable packet filter, only allow unicast packet to send up */ dhd_set_packet_filter(1, dhd); @@ -690,9 +694,11 @@ static int dhd_set_suspend(int value, dhd_pub_t *dhd) /* Kernel resumed */ DHD_ERROR(("%s: Remove extra suspend setting\n", __FUNCTION__)); +#if !defined(SUPPORT_PM2_ONLY) power_mode = PM_FAST; dhd_wl_ioctl_cmd(dhd, WLC_SET_PM, (char *)&power_mode, sizeof(power_mode), TRUE, 0); +#endif /* disable pkt filter */ dhd_set_packet_filter(0, dhd); diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index eab414b46c6..99223be786a 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -3155,8 +3155,9 @@ wl_cfg80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, s32 pm; s32 err = 0; struct wl_priv *wl = wiphy_priv(wiphy); +#if !defined(SUPPORT_PM2_ONLY) dhd_pub_t *dhd = (dhd_pub_t *)(wl->pub); - +#endif CHECK_SYS_UP(wl); WL_DBG(("Enter : power save %s\n", (enabled ? "enable" : "disable"))); @@ -3164,7 +3165,11 @@ wl_cfg80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, return err; } +#if !defined(SUPPORT_PM2_ONLY) pm = enabled ? ((dhd->in_suspend) ? PM_MAX : PM_FAST) : PM_OFF; +#else + pm = enabled ? PM_FAST : PM_OFF; +#endif pm = htod32(pm); err = wldev_ioctl(dev, WLC_SET_PM, &pm, sizeof(pm), true); if (unlikely(err)) { diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c index aedf9705b44..38c81cf94f4 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c @@ -1758,6 +1758,10 @@ wl_cfgp2p_set_p2p_ps(struct wl_priv *wl, struct net_device *ndev, char* buf, int if (legacy_ps != -1) { s32 pm = legacy_ps ? PM_MAX : PM_OFF; +#if defined(SUPPORT_PM2_ONLY) + if (pm == PM_MAX) + pm = PM_FAST; +#endif /* SUPPORT_PM2_ONLY */ ret = wldev_ioctl(wl_to_p2p_bss_ndev(wl, P2PAPI_BSSCFG_CONNECTION), WLC_SET_PM, &pm, sizeof(pm), true); if (unlikely(ret)) { From bcad7194eb7bb4f44b1038f255289db9880c9adb Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 8 Feb 2013 13:43:47 -0800 Subject: [PATCH 507/678] net: wireless: bcmdhd: Enable SUPPORT_PM2_ONLY mode Change-Id: I4f8132191454f0a12f7613388229fed9be5216c9 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/bcmdhd/Makefile b/drivers/net/wireless/bcmdhd/Makefile index 44aaa65bc27..40816c4ac57 100644 --- a/drivers/net/wireless/bcmdhd/Makefile +++ b/drivers/net/wireless/bcmdhd/Makefile @@ -8,7 +8,7 @@ DHDCFLAGS = -Wall -Wstrict-prototypes -Dlinux -DBCMDRIVER \ -DNEW_COMPAT_WIRELESS -DWIFI_ACT_FRAME -DARP_OFFLOAD_SUPPORT \ -DKEEP_ALIVE -DCSCAN -DGET_CUSTOM_MAC_ENABLE -DPKT_FILTER_SUPPORT \ -DEMBEDDED_PLATFORM -DENABLE_INSMOD_NO_FW_LOAD -DPNO_SUPPORT \ - -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT \ + -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT -DSUPPORT_PM2_ONLY \ -Idrivers/net/wireless/bcmdhd -Idrivers/net/wireless/bcmdhd/include DHDOFILES = aiutils.o bcmsdh_sdmmc_linux.o dhd_linux.o siutils.o bcmutils.o \ From f9a808f11e2fe7306efeb3be19d364f9504f4c84 Mon Sep 17 00:00:00 2001 From: jim1_lin Date: Wed, 26 Dec 2012 11:29:53 +0800 Subject: [PATCH 508/678] ARM: tegra: grouper: Enable 802.11n for Russia. Replace RU to XY. Change-Id: I3ecb1f247f7419e511031e13cb85c23693b668ff --- arch/arm/mach-tegra/board-grouper-sdhci.c | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/arm/mach-tegra/board-grouper-sdhci.c b/arch/arm/mach-tegra/board-grouper-sdhci.c index 282a19db292..7428c18a3b9 100644 --- a/arch/arm/mach-tegra/board-grouper-sdhci.c +++ b/arch/arm/mach-tegra/board-grouper-sdhci.c @@ -46,10 +46,38 @@ static int grouper_wifi_reset(int on); static int grouper_wifi_power(int on); static int grouper_wifi_set_carddetect(int val); +/* Customized Locale table : OPTIONAL feature */ +#define WLC_CNTRY_BUF_SZ 4 +typedef struct cntry_locales_custom { + char iso_abbrev[WLC_CNTRY_BUF_SZ]; + char custom_locale[WLC_CNTRY_BUF_SZ]; + int custom_locale_rev; +} cntry_locales_custom_t; + +static cntry_locales_custom_t grouper_wifi_translate_custom_table[] = { +/* Table should be filled out based on custom platform regulatory requirement */ + {"RU", "XY", 4} +}; + +static void *grouper_wifi_get_country_code(char *ccode) +{ + int size = ARRAY_SIZE(grouper_wifi_translate_custom_table); + int i; + + if (!ccode) + return NULL; + + for (i = 0; i < size; i++) + if (strcmp(ccode, grouper_wifi_translate_custom_table[i].iso_abbrev) == 0) + return &grouper_wifi_translate_custom_table[i]; + return NULL; +} + static struct wifi_platform_data grouper_wifi_control = { .set_power = grouper_wifi_power, .set_reset = grouper_wifi_reset, .set_carddetect = grouper_wifi_set_carddetect, + .get_country_code = grouper_wifi_get_country_code, }; static struct resource wifi_resource[] = { From c62cbbbefc1e0993a90c4b0cc085306152eea142 Mon Sep 17 00:00:00 2001 From: pomelo_hsieh Date: Thu, 7 Feb 2013 11:20:39 +0800 Subject: [PATCH 509/678] To make charger-ic to be able to detect charger type when battery out of power. Change-Id: I29acee37aed5594e218ba24c8bd4840bd95bf476 --- drivers/usb/gadget/fsl_udc_core.c | 35 +++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 0a02d200104..7a4d4271982 100755 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -136,6 +136,7 @@ static struct delayed_work smb347_hc_mode_work; extern int smb347_hc_mode_callback(bool enable, int cur); extern void fsl_wake_lock_timeout(void); +extern void usb_det_cable_callback(unsigned cable_type); /* Export the function "unsigned int get_usb_cable_status(void)" for others to query the USB cable status. */ unsigned int get_usb_cable_status(void) @@ -274,6 +275,7 @@ static void cable_detection_work_handler(struct work_struct *w) { mutex_lock(&s_cable_info.cable_info_mutex); s_cable_info.cable_status = 0x00; //0000 + u32 val; printk(KERN_INFO "%s(): vbus_active = %d and is_active = %d\n", __func__, s_cable_info.udc_vbus_active, s_cable_info.is_active); @@ -285,6 +287,8 @@ static void cable_detection_work_handler(struct work_struct *w) s_cable_info.ac_connected = 0; + usb_det_cable_callback(s_cable_info.cable_status); + if ((pcb_id_version <= 0x2) && (project_id == GROUPER_PROJECT_NAKASI)) { #if BATTERY_CALLBACK_ENABLED battery_callback(s_cable_info.cable_status); @@ -294,17 +298,24 @@ static void cable_detection_work_handler(struct work_struct *w) touch_callback(s_cable_info.cable_status); #endif } else if (!s_cable_info.udc_vbus_active && s_cable_info.is_active) { - switch (fsl_readl(&dr_regs->portsc1) & PORTSCX_LINE_STATUS_BITS) { - case PORTSCX_LINE_STATUS_SE0: - s_cable_info.ac_connected = 0; break; - case PORTSCX_LINE_STATUS_JSTATE: - s_cable_info.ac_connected = 0; break; - case PORTSCX_LINE_STATUS_KSTATE: - s_cable_info.ac_connected = 0; break; - case PORTSCX_LINE_STATUS_UNDEF: - s_cable_info.ac_connected = 1; break; - default: - s_cable_info.ac_connected = 0; break; + val = fsl_readl(&dr_regs->usbcmd); + if (val & USB_CMD_RUN_STOP) { + switch (fsl_readl(&dr_regs->portsc1) & PORTSCX_LINE_STATUS_BITS) { + case PORTSCX_LINE_STATUS_SE0: + s_cable_info.ac_connected = 0; break; + case PORTSCX_LINE_STATUS_JSTATE: + s_cable_info.ac_connected = 0; break; + case PORTSCX_LINE_STATUS_KSTATE: + s_cable_info.ac_connected = 0; break; + case PORTSCX_LINE_STATUS_UNDEF: + s_cable_info.ac_connected = 1; break; + default: + s_cable_info.ac_connected = 0; break; + } + } else { + printk(KERN_INFO "USB device controller was not ready\n"); + mutex_unlock(&s_cable_info.cable_info_mutex); + return; } if(!s_cable_info.ac_connected) { @@ -316,6 +327,8 @@ static void cable_detection_work_handler(struct work_struct *w) s_cable_info.cable_status = 0x03; //0011 } + usb_det_cable_callback(s_cable_info.cable_status); + if ((pcb_id_version <= 0x2) && (project_id == GROUPER_PROJECT_NAKASI)) { fsl_smb347_hc_mode_callback_work(1,1); #if BATTERY_CALLBACK_ENABLED From f78b3a6ca55c3b7a2e7b1056caaaa7de3e6e02ff Mon Sep 17 00:00:00 2001 From: jerryyc_hu Date: Tue, 22 Jan 2013 23:13:18 +0800 Subject: [PATCH 510/678] 8047490 Change battery-driver for IEEE 1725 certificate. Stop charging if i2c communication failure, battery over temperature or using non-original battery. Change-Id: I866c1f1044d21ec177d5daca3f286a32326a08f8 Conflicts: drivers/power/smb347-charger.c --- drivers/power/bq27541_battery.c | 66 ++++++++++++++++++++++++++++++++- drivers/power/smb347-charger.c | 4 +- include/linux/smb347-charger.h | 1 + 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/drivers/power/bq27541_battery.c b/drivers/power/bq27541_battery.c index cbb1bed6d32..6816b7eecb7 100755 --- a/drivers/power/bq27541_battery.c +++ b/drivers/power/bq27541_battery.c @@ -61,6 +61,7 @@ /* Battery flags bit definitions */ #define BATT_STS_DSG 0x0001 #define BATT_STS_FC 0x0200 +#define BATT_STS_CHG_INH 0x0800 /* Debug Message */ #define BAT_NOTICE(format, arg...) \ @@ -74,6 +75,8 @@ unsigned battery_cable_status = 0; unsigned battery_driver_ready = 0; static int ac_on ; static int usb_on ; +unsigned int bq27541_i2c_error; +static unsigned int ota_flag = 0; static unsigned int battery_current; static unsigned int battery_remaining_capacity; static atomic_t device_count; @@ -85,6 +88,7 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp,union static int bq27541_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); extern unsigned get_usb_cable_status(void); +extern int smb347_charger_enable(bool enable); module_param(battery_current, uint, 0644); module_param(battery_remaining_capacity, uint, 0644); @@ -235,6 +239,7 @@ static struct bq27541_device_info { struct miscdevice battery_misc; struct wake_lock low_battery_wake_lock; struct wake_lock cable_wake_lock; + char device_name[5]; int smbus_status; int battery_present; int low_battery_present; @@ -487,7 +492,23 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, if ((bq27541_device->smbus_status < 0) && (psp != POWER_SUPPLY_PROP_TEMP)) { dev_err(&bq27541_device->client->dev, "%s: i2c read for %d failed\n", __func__, reg_offset); + + if (bq27541_i2c_error < 3) { + bq27541_i2c_error++; + if (battery_driver_ready) { + cancel_delayed_work(&bq27541_device->status_poll_work); + queue_delayed_work(battery_work_queue,&bq27541_device->status_poll_work, 1*HZ); + } + if(bq27541_i2c_error == 3) { + BAT_NOTICE("charger disable !!\n"); + smb347_charger_enable(0); + } + BAT_NOTICE("bq27541_i2c_error=%d\n", bq27541_i2c_error); + } return -EINVAL; + } else if (bq27541_device->smbus_status >= 0) { + if (bq27541_i2c_error) + bq27541_i2c_error--; } if (psp == POWER_SUPPLY_PROP_VOLTAGE_NOW) { @@ -495,6 +516,7 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, rt_value <= bq27541_data[REG_VOLTAGE].max_value) { if (rt_value > BATTERY_PROTECTED_VOLT) { val->intval = bq27541_device->bat_vol = rt_value*1000; + bq27541_i2c_error = 0; } else { val->intval = bq27541_device->bat_vol; } @@ -522,8 +544,23 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, if (ac_on || usb_on) { /* Charging detected */ if (bq27541_device->old_capacity == 100) val->intval = POWER_SUPPLY_STATUS_FULL; - else + else { val->intval = POWER_SUPPLY_STATUS_CHARGING; + if (ret & BATT_STS_CHG_INH) { + if (ota_flag == 0) { + BAT_NOTICE("charger disable !!\n"); + smb347_charger_enable(0); + ota_flag = 1; + } + val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; + } else { + if (ota_flag) { + BAT_NOTICE("charger enable !!\n"); + smb347_charger_enable(1); + ota_flag = 0; + } + } + } } else if (ret & BATT_STS_FC) { /* Full-charged condition reached */ if (!ac_on) val->intval = POWER_SUPPLY_STATUS_DISCHARGING; @@ -535,7 +572,7 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; } BAT_NOTICE("status: %s ret= 0x%04x\n", status_text[val->intval], ret); - + bq27541_i2c_error = 0; } else if (psp == POWER_SUPPLY_PROP_TEMP) { ret = bq27541_device->bat_temp = rt_value; @@ -726,6 +763,26 @@ static int bq27541_get_property(struct power_supply *psy, return -EINVAL; } +static int is_legal_pack(void) +{ + char data[7]; + int ret, retry = 3; + + while(--retry > 0) + { + ret = i2c_smbus_read_i2c_block_data(bq27541_device->client, 0x63, 7, data); + if (ret >= 0) { + if(!strncmp(data, "ME370", 5)) { + strncpy(bq27541_device->device_name, data, 5); + BAT_NOTICE("device name: %s\n", bq27541_device->device_name); + return 1; + } + } + } + BAT_NOTICE("device name: not found\n"); + return 0; +} + #include "stress_test.c" static int bq27541_probe(struct i2c_client *client, const struct i2c_device_id *id) @@ -750,6 +807,11 @@ static int bq27541_probe(struct i2c_client *client, bq27541_device->shutdown_disable = 1; bq27541_device->cap_zero_count = 0; + if(!is_legal_pack()) { + BAT_NOTICE("charger disable !!\n"); + smb347_charger_enable(0); + } + for (i = 0; i < ARRAY_SIZE(bq27541_supply); i++) { ret = power_supply_register(&client->dev, &bq27541_supply[i]); if (ret) { diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index e466bd548c5..2534b0686e2 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -952,7 +952,7 @@ static int cable_type_detect(void) // tmtmtm end } else { charger->cur_cable_type = unknow_cable; - printk(KERN_INFO "Unkown Plug In Cable type !! retval=%d\n",retval); + printk(KERN_INFO "Unkown Plug In Cable type !\n"); if (gpio_get_value(dock_in)) { charger->cur_cable_type = usb_cable; success = battery_callback(usb_cable); @@ -1025,7 +1025,7 @@ static void dockin_isr_work_function(struct work_struct *dat) static ssize_t smb347_reg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = charger->client; - uint8_t config_reg[14], cmd_reg[1], status_reg[11]; + uint8_t config_reg[14], cmd_reg[1], status_reg[10]; int i, ret = 0; ret += i2c_smbus_read_i2c_block_data(client, smb347_CHARGE, 15, config_reg) diff --git a/include/linux/smb347-charger.h b/include/linux/smb347-charger.h index ac7ccbdc087..83c97c96e48 100644 --- a/include/linux/smb347-charger.h +++ b/include/linux/smb347-charger.h @@ -74,6 +74,7 @@ struct smb347_charger { struct wake_lock wake_lock_dockin; struct mutex cable_lock; struct mutex dockin_lock; + struct mutex pinctrl_lock; void *charger_cb_data; enum charging_states state; enum charger_type chrg_type; From 62e3db4ec0f97f23e818d03ebadea18d62b578e6 Mon Sep 17 00:00:00 2001 From: Joseph Wu Date: Mon, 25 Feb 2013 19:03:01 +0800 Subject: [PATCH 511/678] Sensors: Invensense v5.1.5 IIO driver release. - New driver with mpl v5.1.5 is released. - Provided by Invensense. Change-Id: I6404c04fd79d1d88413675af6d25d21ddec57fc8 Signed-off-by: Joseph Wu --- arch/arm/configs/tegra3_android_defconfig | 2 +- drivers/staging/iio/imu/mpu/Kconfig | 28 +- drivers/staging/iio/imu/mpu/Makefile | 10 + drivers/staging/iio/imu/mpu/README | 644 +++- .../staging/iio/imu/mpu/dmpDefaultMPU6050.c | 199 +- drivers/staging/iio/imu/mpu/dmpKey.h | 120 +- drivers/staging/iio/imu/mpu/dmpmap.h | 25 +- drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c | 121 +- drivers/staging/iio/imu/mpu/inv_mpu_core.c | 3211 ++++++++--------- drivers/staging/iio/imu/mpu/inv_mpu_iio.h | 974 +++-- drivers/staging/iio/imu/mpu/inv_mpu_misc.c | 1372 ++++--- drivers/staging/iio/imu/mpu/inv_mpu_ring.c | 890 +++-- drivers/staging/iio/imu/mpu/inv_mpu_trigger.c | 31 +- .../staging/iio/imu/mpu/inv_slave_bma250.c | 217 +- drivers/staging/iio/inv_test/Kconfig | 11 + drivers/staging/iio/inv_test/Makefile | 6 + drivers/staging/iio/inv_test/inv_counters.c | 154 + drivers/staging/iio/inv_test/inv_counters.h | 72 + drivers/staging/iio/magnetometer/Kconfig | 11 +- drivers/staging/iio/magnetometer/Makefile | 6 +- .../iio/magnetometer/inv_compass/Kconfig | 25 + .../iio/magnetometer/inv_compass/Makefile | 25 + .../iio/magnetometer/inv_compass/README | 176 + .../{ => inv_compass}/inv_ami306_core.c | 151 +- .../{ => inv_compass}/inv_ami306_iio.h | 28 +- .../{ => inv_compass}/inv_ami306_ring.c | 66 +- .../{ => inv_compass}/inv_ami306_trigger.c | 10 +- .../inv_compass/inv_yas53x_core.c | 969 +++++ .../magnetometer/inv_compass/inv_yas53x_iio.h | 172 + .../inv_compass/inv_yas53x_ring.c | 165 + .../inv_compass/inv_yas53x_trigger.c | 91 + include/linux/mpu.h | 1 + 32 files changed, 6441 insertions(+), 3542 deletions(-) create mode 100644 drivers/staging/iio/inv_test/Kconfig create mode 100644 drivers/staging/iio/inv_test/Makefile create mode 100644 drivers/staging/iio/inv_test/inv_counters.c create mode 100644 drivers/staging/iio/inv_test/inv_counters.h create mode 100644 drivers/staging/iio/magnetometer/inv_compass/Kconfig create mode 100644 drivers/staging/iio/magnetometer/inv_compass/Makefile create mode 100644 drivers/staging/iio/magnetometer/inv_compass/README rename drivers/staging/iio/magnetometer/{ => inv_compass}/inv_ami306_core.c (85%) rename drivers/staging/iio/magnetometer/{ => inv_compass}/inv_ami306_iio.h (88%) rename drivers/staging/iio/magnetometer/{ => inv_compass}/inv_ami306_ring.c (82%) rename drivers/staging/iio/magnetometer/{ => inv_compass}/inv_ami306_trigger.c (91%) create mode 100644 drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_core.c create mode 100644 drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_iio.h create mode 100644 drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_ring.c create mode 100644 drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_trigger.c diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 1be3e6188ea..11eb7e67a29 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -458,7 +458,7 @@ CONFIG_IIO_KFIFO_BUF=y CONFIG_INV_MPU_IIO=y CONFIG_SENSORS_ISL29028=y CONFIG_SENSORS_LTR558=y -CONFIG_AMI306=y +CONFIG_INV_AMI306_IIO=y CONFIG_RIL=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y diff --git a/drivers/staging/iio/imu/mpu/Kconfig b/drivers/staging/iio/imu/mpu/Kconfig index a19f9809754..21cc237cc2a 100644 --- a/drivers/staging/iio/imu/mpu/Kconfig +++ b/drivers/staging/iio/imu/mpu/Kconfig @@ -1,12 +1,24 @@ # -# inv-mpu drivers for Invensense MPU devices and combos +# inv-mpu-iio driver for Invensense MPU devices and combos # config INV_MPU_IIO - tristate "Invensense MPU devices" - depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF && IIO_TRIGGER && !INV_MPU - default n - help - This driver supports the Invensense MPU devices. - This driver can be built as a module. The module will be called - inv-mpu-iio. + tristate "Invensense MPU devices" + depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF && IIO_TRIGGER && !INV_MPU + default n + help + This driver supports the Invensense MPU devices. + This includes MPU6050/MPU3050/MPU9150/ITG3500/MPU6500/MPU9250. + This driver can be built as a module. The module will be called + inv-mpu-iio. + +config INV_IIO_MPU3050_ACCEL_SLAVE_BMA250 + bool "Invensense MPU3050 slave accelerometer device for bma250" + depends on INV_MPU_IIO + default n + help + This is slave device enable MPU3050 accelerometer slave device. + Right now, it is only bma250. For other acceleromter device, + it can be added to this menu if the proper interface is filled. + There are some interface function to be defined. + diff --git a/drivers/staging/iio/imu/mpu/Makefile b/drivers/staging/iio/imu/mpu/Makefile index 0efcc6d7065..3bbb26997af 100644 --- a/drivers/staging/iio/imu/mpu/Makefile +++ b/drivers/staging/iio/imu/mpu/Makefile @@ -11,7 +11,17 @@ inv-mpu-iio-objs += inv_mpu_misc.o inv-mpu-iio-objs += inv_mpu3050_iio.o inv-mpu-iio-objs += dmpDefaultMPU6050.o +CFLAGS_inv_mpu_core.o += -Idrivers/staging/iio +CFLAGS_inv_mpu_ring.o += -Idrivers/staging/iio +CFLAGS_inv_mpu_trigger.o += -Idrivers/staging/iio +CFLAGS_inv_mpu_misc.o += -Idrivers/staging/iio +CFLAGS_inv_mpu3050_iio.o += -Idrivers/staging/iio +CFLAGS_dmpDefaultMPU6050.o += -Idrivers/staging/iio + # the Bosch BMA250 driver is added to the inv-mpu device driver because it # must be connected to an MPU3050 device on the secondary slave bus. +ifeq ($(CONFIG_INV_IIO_MPU3050_ACCEL_SLAVE_BMA250), y) inv-mpu-iio-objs += inv_slave_bma250.o +CFLAGS_inv_slave_bma250.o += -Idrivers/staging/iio +endif diff --git a/drivers/staging/iio/imu/mpu/README b/drivers/staging/iio/imu/mpu/README index a0cfb3f2b7b..a0a954852f5 100644 --- a/drivers/staging/iio/imu/mpu/README +++ b/drivers/staging/iio/imu/mpu/README @@ -1,56 +1,133 @@ Kernel driver inv-mpu-iio Author: Invensense +Table of Contents: +================== +- Description +- Integrating the Driver in the Linux Kernel +- Board and Platform Data + > Interrupt Pin + > Platform Data +- Board File Modifications for Secondary I2C Configuration + > MPU-6050 + AKM8963 on the secondary I2C interface + > MPU-6500 + AKM8963 on the secondary I2C interface + > MPU-9150 + > MPU-9250 + > MPU-3050 + BMA250 on the secondary I2C interface +- Board File Modifications for Invensense Devices + > MPU-3050 + > ITG-3500 + > MPU-6050 + > MPU-6500 + > MPU-6XXX + > MPU-9150 + > MPU-9250 +- IIO Subsystem + > Communicating with the Driver in Userspace + > ITG-3500 + > MPU-6050 and MPU-6500 + > MPU-9150 + > MPU-9250 + > MPU-3050 + BMA250 on the secondary I2C interface +- Suspend and Resume +- DMP Event +- Motion Event +- Streaming Data to an Userspace Application +- Recommended Sysfs Entry Setup Sequence + > With DMP Firmware + > Without DMP Firmware +- Test Applications + > Running Test Applications with MPU-9150/MPU-6050/MPU-6500/MPU-9250 + > Running Test Applications with MPU-3050/ITG-3500 + + Description ------------ +=========== This document describes how to install the Invensense device driver into a -Linux kernel. At the moment, this driver supports the ITG3500/MPU6050/MPU9150/MPU3050. The slave -address of these four chips are 0x68. However, the actual slave address depends on the board -configuration. The driver does not assume anything about it. - -Files included in this package: -Kconfig -Makefile -inv_mpu_core.c -inv_mpu_misc.c -inv_mpu_trigger.c -inv_mpu3050_iio.c -inv_mpu_iio.h -inv_mpu_ring.c -inv_slave_bma250.c -dmpDefaultMPU6050.c -dmpkey.h -dmpmap.h -mpu.h -Including the driver in the Linux kernel ----------------------------------------- -mpu.h should be added to "kernel/include/linux". -Other files listed should be added to the drivers/staging/iio/imu/mpu directory (or another -directory of your choosing). When building the kernel, the driver will not -appear in menuconfig without modifications similar to those below: - -modify "drivers/staging/iio/imu/Kconfig" like -source "drivers/staging/iio/imu/mpu/Kconfig" - -modify "drivers/staging/iio/imu/Makefile" -obj-y += mpu/ +Linux kernel. The Invensense driver currently supports the following sensors: +- ITG-3500 +- MPU-6050 +- MPU-9150 +- MPU-6500 +- MPU-9250 +- MPU-3050 +- MPU-6XXX(either MPU6050 or MPU6500, driver to do auto detection) + +The slave address of each device is either 0x68 or 0x69, depending on the AD0 +pin value of the device. Please refer to the appropriate product specification +document for further information regarding the AD0 pin. The driver supports both +addresses. + +The following files are included in this package: +- Kconfig +- Makefile +- inv_mpu_core.c +- inv_mpu_misc.c +- inv_mpu_trigger.c +- inv_mpu3050_iio.c +- inv_mpu_iio.h +- inv_mpu_ring.c +- inv_slave_bma250.c +- dmpDefaultMPU6050.c +- dmpkey.h +- dmpmap.h +- mpu.h + + +Integrating the Driver in the Linux Kernel +========================================== +Please add the files as follows: +- Add mpu.h to "kernel/include/linux". +- Add all other files to drivers/staging/iio/imu/inv_mpu +(another directory is acceptable, but this is the recommended destination) + +In order to see the driver in menuconfig when building the kernel, please +make modifications as shown below: + + modify "drivers/staging/iio/imu/Kconfig" with: + >> source "drivers/staging/iio/imu/inv_mpu/Kconfig" + + modify "drivers/staging/iio/imu/Makefile" with: + >> obj-y += inv_mpu/ + Board and Platform Data ------------------------ -The board file needs to be modified to register the device on an I2C bus. An -i2c_board_info instance must be defined as seen below. The hardcoded value of -140 corresponds to the GPIO input pin wired to the device's interrupt pin. -This pin will most likely be different for your platform. -platform data is for orientation matrix, and secondary bus situations. -For MPU9150, it regarded as a MPU9150 and AKM8975 in the secondary. -So the secondary i2c address must be filled. ------------------------------------------------------------------ -The board file is arch/arm/mach-omap2/board-omap4panda.c or -modify the board file in your system as below: --------------------------------------------------------- -For AKM8963 in the secondary i2c bus of MPU6050, +======================= +In order to recognize the Invensense device on the I2C bus, the board file must +be modified. +The i2c_board_info instance must be defined as shown below. + +Interrupt Pin +------------- +The hardcoded value of 140 corresponds to the GPIO input pin connected to the +Invensense device's interrupt pin. +This pin will most likely be different for your platform, and the value should +be changed accordingly. + +Platform Data +------------- +The platform data (orientation matrix and secondary bus configurations) must be +modified as show below, according to your particular platform configuration. + +Please note that the MPU-9150 it is treated as a MPU-6050 with AKM8975 on the +device's secondary I2C interface. Thus the secondary I2C address must be +provided. + +Please note that the MPU-9250 it is treated as a MPU-6500 with AKM8963 on the +device's secondary I2C interface. Thus the secondary I2C address must be +provided. + +Board File Modifications for Secondary I2C Configuration +======================================================== +For the Panda Board, the board file can be found at +arch/arm/mach-omap2/board-omap4panda.c. +Please modify the pertinent baord file in your system according to the examples +shown below: + +MPU-6050 + AKM8963 on the secondary I2C interface +------------------------------------------------- static struct mpu_platform_data gyro_platform_data = { - .int_config = 0x00, + .int_config = 0x10, .level_shifter = 0, .orientation = { -1, 0, 0, 0, 1, 0, @@ -59,10 +136,26 @@ static struct mpu_platform_data gyro_platform_data = { .sec_slave_id = COMPASS_ID_AK8963, .secondary_i2c_addr = 0x0E }; ------------------------------------------------------------ -For MPU9150, the secondary i2c bus address must be filled as below. + +MPU-6500 + AKM8963 on the secondary I2C interface +------------------------------------------------- static struct mpu_platform_data gyro_platform_data = { - .int_config = 0x00, + .int_config = 0x10, + .level_shifter = 0, + .orientation = { -1, 0, 0, + 0, 1, 0, + 0, 0, -1 }, + .sec_slave_type = SECONDARY_SLAVE_TYPE_COMPASS, + .sec_slave_id = COMPASS_ID_AK8963, + .secondary_i2c_addr = 0x0E +}; + +MPU-9150 +-------- +For MPU-9150, please provide the following secondary I2C bus information. + +static struct mpu_platform_data gyro_platform_data = { + .int_config = 0x10, .level_shifter = 0, .orientation = { -1, 0, 0, 0, 1, 0, @@ -71,10 +164,28 @@ static struct mpu_platform_data gyro_platform_data = { .sec_slave_id = COMPASS_ID_AK8975, .secondary_i2c_addr = 0x0E }; ------------------------------------------------------------ -for BMA250 in the secondary, please use the platform data as: + +MPU-9250 +-------- +For MPU-9250, please provide the following secondary I2C bus information. + +static struct mpu_platform_data gyro_platform_data = { + .int_config = 0x10, + .level_shifter = 0, + .orientation = { -1, 0, 0, + 0, 1, 0, + 0, 0, -1 }, + .sec_slave_type = SECONDARY_SLAVE_TYPE_COMPASS, + .sec_slave_id = COMPASS_ID_AK8963, + .secondary_i2c_addr = 0x0C +}; + +MPU-3050 + BMA250 on the secondary I2C interface +------------------------------------------------ +For BMA250 on the secondary I2C bus, please provide the following information. + static struct mpu_platform_data gyro_platform_data = { - .int_config = 0x00, + .int_config = 0x10, .level_shifter = 0, .orientation = { -1, 0, 0, 0, 1, 0, @@ -83,10 +194,23 @@ static struct mpu_platform_data gyro_platform_data = { .sec_slave_id = ACCEL_ID_BMA250, .secondary_i2c_addr = 0x18, }; ---------------------------------------------------------------- -the i2c init data is: ----------------------------------------------------------------- -For MPU3050, + + +Board File Modifications for Invensense Devices +=============================================== +For Invensense devices, please provide the i2c init data as shown in the +examples below. + +In the _i2c_init function, the device is registered in the following manner: + + // arch/arm/mach-omap2/board-omap4panda.c + // in static int __init omap4_panda_i2c_init(void) + omap_register_i2c_bus(4, 400, + single_chip_board_info, + ARRAY_SIZE(single_chip_board_info)); + +MPU-3050 +-------- static struct i2c_board_info __initdata single_chip_board_info[] = { { I2C_BOARD_INFO("mpu3050", 0x68), @@ -94,8 +218,9 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { .platform_data = &gyro_platform_data, }, }; ----------------------------------------------------------------- -for ITG3500: + +ITG-3050 +-------- static struct i2c_board_info __initdata single_chip_board_info[] = { { I2C_BOARD_INFO("itg3500", 0x68), @@ -103,7 +228,9 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { .platform_data = &gyro_platform_data, }, }; -for MPU6050 + +MPU6050 +------- static struct i2c_board_info __initdata single_chip_board_info[] = { { I2C_BOARD_INFO("mpu6050", 0x68), @@ -111,7 +238,29 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { .platform_data = &gyro_platform_data, }, }; -for MPU9150 + +MPU6500 +------- +static struct i2c_board_info __initdata single_chip_board_info[] = { + { + I2C_BOARD_INFO("mpu6500", 0x68), + .irq = (IH_GPIO_BASE + MPUIRQ_GPIO), + .platform_data = &gyro_platform_data, + }, +}; + +MPU6XXX +------- +static struct i2c_board_info __initdata single_chip_board_info[] = { + { + I2C_BOARD_INFO("mpu6xxx", 0x68), + .irq = (IH_GPIO_BASE + MPUIRQ_GPIO), + .platform_data = &gyro_platform_data, + }, +}; + +MPU9150 +------- arch/arm/mach-omap2/board-omap4panda.c static struct i2c_board_info __initdata single_chip_board_info[] = { { @@ -121,204 +270,319 @@ static struct i2c_board_info __initdata single_chip_board_info[] = { }, }; -In the _i2c_init function, the device is registered in the following manner: - +MPU9250 +------- arch/arm/mach-omap2/board-omap4panda.c - in static int __init omap4_panda_i2c_init(void) -omap_register_i2c_bus(4, 400, single_chip_board_info, ARRAY_SIZE(single_chip_board_info)); +static struct i2c_board_info __initdata single_chip_board_info[] = { + { + I2C_BOARD_INFO("mpu9250", 0x68), + .irq = (IH_GPIO_BASE + MPUIRQ_GPIO), + .platform_data = &gyro_platform_data, + }, +}; IIO subsystem ----------------------------------------------- -successful installation will create two directories under /sys/bus/iio/devices -iio:device0 -trigger0 -Under /dev/ diretory, a file "iio:device0" will also be created(or iio:deviceX, if -you have more than one iio devices). -Communicating with the driver in userspace +============= +A successful installation will create the following two new directories under +/sys/bus/iio/devices: + - iio:device0 + - trigger0 + +Also, a new file, "iio:device0", will be created in the /dev/ diretory. +(if you have more than one IIO device, the file will be named "iio:deviceX", +where X is a number) + + +Communicating with the Driver in Userspace ------------------------------------------ -Upon installation, the driver generates several files in sysfs. If your -platform is configured as detailed above, navigate to the following path to -find these files: -/sys/bus/iio/devices/iio:device0 - -The list below provides a brief description for each file. --------------------------------------- -For ITG3500: +The driver generates several files in sysfs upon installation. +These files are used to communicate with the driver. The files can be found +at /sys/bus/iio/devices/iio:device0 (or ../iio:deviceX as shown above). + +A brief description of the pertinent files for each Invensense device is shown +below: + +ITG-3500 +-------- temperature (Read-only) -Read temperature data directly from the temperature register. +--Read temperature data directly from the temperature register. sampling_frequency (Read/write) -Configure the ADC sampling rate and FIFO output rate. +--Configure the ADC sampling rate and FIFO output rate. sampling_frequency_available(read-only) -show commonly used frequency +--show commonly used frequency clock_source (Read-only) -Check which clock-source is used by the chip. +--Check which clock-source is used by the chip. power_state (Read/write) -turn on/off the power supply +--turn on/off the power supply self_test (read-only) -read this entry trigger self test. The return value is D. +--read this entry trigger self test. The return value is D. D is the success/fail. For different chip, the result is different for success/fail. 1 means success 0 means fail. The LSB of D is for gyro; the bit next to LSB of D is for accel. The bit 2 of D is for compass result. key (read-only) -show the key value of this driver. Used by MPL. +--show the key value of this driver. Used by MPL. gyro_matrix (read-only) -show the orient matrix obtained from board file. +--show the orientation matrix obtained from the board file. -------------------------------------------------------------- -For MPU6050: -MPU6050 has all the sysfs files that ITG3500 has. It has additional files list below: +MPU-6050 and MPU-6500 +--------------------- +MPU-6050 and MPU-6500 have all sysfs files belonging to ITG-3500 (shown above). +In addition, it has the files below: gyro_enable (read/write) -enable/disable gyro functionality. affect raw_gyro. turn off this will shut down gyro and save power. +--enable/disable gyro functionality. Affects raw_gyro. Turning this off this + will shut down gyro and save power. accl_enable (read/write) -enable/disable accelerometer functionality. affect raw_accl. turn off this will shut down accel and save power. +--enable/disable accelerometer functionality. Affects raw_accl. +Turning this off this will shut down accel and save power. firmware_loaded (read/write) -Flag indicate the whether firmware is loaded or not in the DMP engine. 0 means no firmware loaded. -1 means firmware is already loaded . This flag can only be written as 0. 1 is updated -internally. +--Flag indicating the whether firmware is loaded or not in the DMP engine. +0 means no firmware loaded. 1 means firmware is already loaded . This +flag can only be written as 0. It internally updates to 1. dmp_on(read/write) -This entry controls whether to run DMP or not. To enable DMP , firmware_loaded must be 1. write 1 to enable -DMP and write 0 to disable dmp. - -dmp_in_on(read/write) -This entry controls whether dmp interrupt is on/off. firmware_loaded must be 1. sometimes, it is desirable -that interrupt is off while DMP is running. +--This entry controls whether to run DMP or not. +Write 1 to enable DMP and write 0 to disable dmp. +Please note that firmware_loaded must be 1 in order to enable DMP. + +dmp_int_on(read/write) +--This entry controls whether dmp interrupt is on/off. +Please note that firmware_loaded must be 1. +Also, we'd like to remind you that it is sometimes advantageous to +turn interrupts off while the DMP is running. + +dmp_output_rate +--control dmp output rate when dmp is on. + +dmp_event_int_on(read/write) +--This entry controls whether dmp event interrupt is on/off. +Please note that turning this on will turn off the data interrupt. +Interrupts will be generated only when events occur. +This is useful for saving power when the system is waiting for a special event +to wake up. dmp_firmware (write only binary file) -This is the entry that firmware code is loaded into. If the action is succeful, firmware_loaded will -be updated as 1. In order to load new firmware, firmware_loaded flag should be set 0. - -lpa_mode(read-write) -Low power accelerometer mode -lpa_freq(read-write) -low power acceleromter frequency. +--DMP firmware code is loaded into this entry. +If loading is successful, the firmware_loaded flag will be updated to 1. +In order to load new firmware, the firmware_loaded flag must be first set to 0. accel_matrix -orient matrix for accel +--orientation matrix for accelerometer. -flick_lower, -flick_upper, -flick_counter, -flick_message_on, -flick_int_on, -flick_axis, -Flick related entry +quaternion_on +--Turn on/off quaterniion data output. DMP is required for this feature. pedometer_time pedometer_steps, -Pedometer related entry +--Pedometer related entries -event_flick event_tap -event_orientation event_display_orientation -event related entry +event_accel_motion +event_smd +--Event related entries. +Please poll these entries to read their values. Direct reads will yield +meaningless results. +Further details are provided in the DMP Events section of this README. tap_on -control tap function of DMP +--Controls tap function of DMP tap_time tap_min_count tap_threshold -tap related entries. control various parameters of tap function. - -orientation_on -turn on/off orientation function of DMP. +--Tap related entries. Controls various parameters of tap function. display_orientation_on -turn on/off display orientation function of DMP. +--Turn on/off display orientation function of DMP. + +smd_enable +enable SMD(Significant Motion Detection) detection. + +smd_threshold +This set the threshold of the motion when SMD start to be triggered. The +value is in acclerometer counts. + +smd_delay_threshold +This sets the threshold of time after which SMD can be triggered. +The value is in seconds. + +smd_delay_threshold2 +This sets the threshold of time during which SMD can be triggered (after the +smd_delay_threshold timer has expired). +The value is in seconds. quaternion_on -turn on/off quaterniion data output. must use DMP. -------------------------------------------------------------------- -for MPU9150 and secondary compass -MPU9150 has every entry MPU6050 has. It has additional entries: +--Turn on/off quaterniion data output. DMP is required for this feature. + +Low power accel motion interrupt related settings. +if motion_lpa_on is set, this will disable all engines except accel. Accel will +enter low power mode and the whole chip will be turned on/off at specific frequency. +----------------------------------------------------------------------------- +motion_lpa_duration +--set motion duration. in ms. This means filtered out all the motino interrupts + during this period. + +motion_lpa_threshold +--set motion threshold. in mg. The maximum is 1020mg and resolution is 32mg. + +motion_lpa_on +--turn on/off motion function. + +motion_lpa_freq +--motion lpa frequency. which determines power on/off frequency. +------------------------------------------------------------------------------ +MPU-9150 +-------- +MPU-9150 has all of MPU-6050's entries. It also has two additional entries, +described below. compass_enable (read/write) -enable this will enable compass function. +--Enables compass function. compass_matrix (read-only) -compass orient matrix ---------------------- -for MPU3050 and secondary accelerometer(only BMA250 is supported right now) -It has every entry ITG3500 has and has two addiontal entries. +--Compass orientation matrix + +MPU-3050 with BMA250 on secondary I2C interface +----------------------------------------------- +MPU-3050 with BMA250 on the secondary I2C interface has ever ITG-3500 entry. +It also has two additional entries, shown below: + accl_matrix + accl_enable ----------------------------------------------------------------------------------- -low power accelerometer mode -Lower power accelerometer mode is a special mode. It works only for accelerometer. -It has two entries, lpa_mode and lpa_freq. Only MPU6050 and MPU9150 has this mode. -To run low power accel mode, set lpa_mode to 1, set lpa_freq to 0~3, which corresponds -to 1.25Hz, 5Hz, 20Hz, 40Hz. "gyro_enable" and "compass_enable" must be zero. "dmp_on" -must be zero. ------------------------------------------------------------------------------------ -dmp event. -dmp event is event out by the DMP unit inside MPU. Only MPU6050 and MPU9150 supports this. -There are four sysfs entreis, event_flick, event_tap and event_orientation and -event_display_orientation. These four events must -be polled before read. The proper method to poll sysfs is: + +Suspend and Resume +=================================================== +The suspend and resume functions are call backs registered to the system +and executed when the system goes in suspend and resumes. +It is enabled when CONFIG_PM is defined. +The current behavior is simple: +- suspend will turn off the chip +- resume will turn on the chip + +However, it is possible for the driver to do more complex things; +for example, leaving pedometers running when system is off. This can save whole +system power while letting pedometer working. Other behaviors are possible +too. + +DMP Event +========= +A DMP Event is an event that is output by the DMP unit within the Invensense +device (MPU). +Only the MPU-6050, MPU-6500, MPU-9250, MPU-9150, MPU-9250 feature the DMP. + +There are four sysfs entries for DMP events: +- event_tap +- event_display_orientation +- event_accel_motion +- event_smd + +These events must be polled before reading. + +The proper method to poll sysfs is as follows: 1. open file. 2. dummy read. 3. poll. 4. once the poll passed, use fopen and fread to read the sysfs entry. 5. interpret the data. ------------------------------------------------------------------------------- -If streaming to a userspace application, the recommended way to access gyro/accel/compass -data is via /dev/iio:device0. Follow these steps to get constant readings from -the driver: + +Streaming Data to an Userspace Application +========================================== +When streaming data to an userspace application, we recommend that you access +gyro/accel/compass data via /dev/iio:device0. + +Please follow the steps below to read data at a constant rate from the driver: 1. Write a 1 to power_state to turn on the chip. This is the default setting after installing the driver. 2. Write the desired output rate to fifo_rate. -3. write 1 to enable to turn on the event. +3. Write 1 to enable to turn on the event. 4. Read /dev/iio:device0 to get a string of gyro/accel/compass data. 5. Parse this string to obtain each gyro/accel/compass element. -6. If dmp firmware code is loaded, using "dmp_on" to enable/disable dmp . -7. If compass is enabled, output will have compass data. -=========================================================================== - Recommended sysfs entry setup senquence -1. without DMP firmware -1.1 set "power_state" to 1, -1.2 change scale and fifo rate value to your need. -1.3 change gyro_enable and accle_enable and compass_enable to your needs. For example, -if you want gyro only, set accl_enable to 0 or set accl_enable to zero and compass_enable to zero. -If you want accel only, set gyro_enable to 0 or set gyro_enable to zero and compass_enable to zero. -If you want compass only, disable gyro and accel. -1.4 set "enable" to 1. you will get output you want. - -2. With DMP firmware -2.1 set "power_state" to 1, -2.2 write "0" to firmware_loaded if it is not zero already. -2.3 load firmware into "dmp_firmware" as a whole. Don't split the DMP firmware image. -2.4 make sure firmware_loaded is 1 after loading. -2.5 make other configurations similar to the situation as without DMP firmware. -2.6 set dmp_on to 1. -2.7 set "enable" to 1. -======================================================= -The enable function is using enable entry under "/sys/bus/iio/devices/iio:device0/buffer" -========================================================== -test applications: -Test application is under ARTHROPOD/trunk/software/simple_apps/mpu_iio ------------------------------------------- -To run with MPU9150/MPU6050: -using the following command: -for orientation/tap/flick/display orientation event: -mpu_iio -c 10 -l 3 -p -for normal data print -mpu_iio -c 10 -l 3 ----------------------------------------- -To run with MPU3050/ITG3500: -mpu_iio -c 10 -l 3 -r ------------------------------------------ -Please use mpu_iio.c and iio_utils.h as the sample code for your development. +6. If dmp firmware code is loaded, use "dmp_on" to enable/disable dmp. +7. If compass is enabled, the output will contain compass data. + + +Recommended Sysfs Entry Setup Senquence +======================================= + +Without DMP Firmware +-------------------- +1. Set "power_state" to 1, +2. Set the scale and fifo rate values according to your needs. +3. Set gyro_enable, accel_enable, and compass_enable according to your needs. + For example: + - If you only want gyro data, set accel_enable to 0 (and compass_enable to + 0, if applicable). + - If you only want accel data, set gyro_enable to 0 (and compass_enable to + 0, if applicable). + - If you only want compass data, set gyro_enable to 0 and accel_enable to 0. +4. Set "enable" to 1. +5. You will now get the output that you want. + +With DMP Firmware +----------------- +1. Set "power_state" to 1. +2. Write "0" to firmware_loaded if it is not zero already. +3. Load firmware into "dmp_firmware" as a whole. Don't split the DMP firmware + image. +4. Make sure firmware_loaded is 1 after loading the DMP image. +5. Make appropriate configurations as shown above in the "without DMP firmware" + case. +6. Set dmp_on to 1. +7. Set "enable" to 1. + +Please note that the enable function uses the enable entry under +"/sys/bus/iio/devices/iio:device0/buffer" + +Test Applications +================= +A test application is located under software/simple_apps/mpu_iio. +This application is stand-alone in that it cannot be run concurrently with other +entities trying to access the device node(s) or sysfs entries; in particular, +the + +Running Test Applications with MPU-9150/MPU-6050/MPU-6500/MPU-9250 +--------------------------------------------------------- +To run test applications with MPU-9150, MPU-9250, MPU-6050, or MPU-6500 devices, +please use the following commands: + +1. For tap/display orientation events: + mpu_iio -c 10 -l 3 -p + +2. In addition, to test the motion interrupt (and no_motion on MPU6050) use: + mpu_iio -c 10 -l 3 -p -m + +3. For printing data normally: + mpu_iio -c 10 -l 3 -r + +Running Test Applications with MPU-3050/ITG-3500 +------------------------------------------------ +To run test applications with MPU-3050 or ITG-3500 devices, +please use the following command: + +1. For printing data normally: + mpu_iio -c 10 -l 3 -r + +Please use mpu_iio.c and iio_utils.h as example code for your development +purposes. + +Stress test application +================================= +A stress test application is located under software/simple_apps/stress_iio. +This application simulates HAL's usage calls to the driver. It creates three +threads. One for data read; one for event read; one for sysfs control. +It can run without any parameters or run with some control parameters. Please +see README in the same directories for details. + diff --git a/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c b/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c index ff327c79c52..22b7ee4922c 100644 --- a/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c +++ b/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c @@ -11,46 +11,79 @@ * GNU General Public License for more details. * */ +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file dmpDefaultMPU6050.c + * @brief dmp Default data + * @details This file is part of invensense mpu driver code + * + */ #include "dmpKey.h" #include "dmpmap.h" -#define CFG_27 (2740) -#define CFG_20 (2078) -#define CFG_23 (2743) -#define CFG_FIFO_ON_EVENT (2689) -#define CFG_ORIENT_IRQ_1 (2533) -#define CGNOTICE_INTR (2636) -#define X_GRT_Y_TMP (1318) +#define CFG_LP_QUAT (2914) +#define END_ORIENT_TEMP (2068) +#define CFG_27 (2944) +#define CFG_20 (2426) +#define CFG_23 (2947) +#define CFG_DISPLAY_ORIENT_INT (2055) +#define CFG_FIFO_ON_EVENT (2892) +#define END_PREDICTION_UPDATE (1963) +#define CGNOTICE_INTR (2822) +#define X_GRT_Y_TMP (1560) #define CFG_DR_INT (1029) #define CFG_AUTH (1035) +#define SKIP_SWING_END_1 (1753) +#define SKIP_SWING_END_2 (1768) #define FCFG_1 (1062) -#define SKIP_X_GRT_Y_TMP (1319) -#define SKIP_END_COMPARE (1395) -#define FCFG_3 (1110) +#define SKIP_X_GRT_Y_TMP (1561) +#define SKIP_END_COMPARE (1637) +#define FCFG_3 (1088) #define FCFG_2 (1066) -#define END_COMPARE_Y_X_TMP2 (1415) -#define CFG_DISPLAY_ORIENT_INT (1706) -#define FCFG_7 (1076) -#define FCFG_6 (1128) -#define NO_ORIENT_INTERRUPT (1725) -#define CFG_8 (2718) -#define CFG_15 (2726) -#define CFG_16 (2744) -#define END_COMPARE_Y_X_TMP (1367) -#define CFG_6 (2751) -#define END_ORIENT_1 (1709) -#define END_COMPARE_Y_X (1444) -#define CFG_LP_QUAT (2712) -#define END_ORIENT (1738) -#define CFG_FLICK_IN (2589) -#define CFG_7 (1221) -#define CFG_MOTION_BIAS (1224) -#define X_GRT_Y (1368) -#define TEMPLABEL (2178) -#define NOT_TIME_MINUS_1 (1528) -#define END_COMPARE_Y_X_TMP3 (1394) -#define X_GRT_Y_TMP2 (1339) +#define STATE2_T (1348) +#define END_COMPARE_Y_X_TMP3 (1636) +#define FCFG_7 (1073) +#define FCFG_6 (1106) +#define FLAT_STATE_END (1915) +#define SWING_END_4 (1818) +#define EXIT_SIGMOTDET (1408) +#define SWING_END_2 (1767) +#define SWING_END_3 (1789) +#define SWING_END_1 (1752) +#define CFG_8 (2920) +#define CFG_15 (2929) +#define CFG_16 (2948) +#define UPDATE_PROP_ROT (2037) +#define CFG_EXT_GYRO_BIAS (1189) +#define END_COMPARE_Y_X_TMP (1609) +#define DO_NOT_UPDATE_PROP_ROT (2041) +#define CFG_7 (1408) +#define FLAT_STATE_END_TEMP (1885) +#define END_ORIENT (2086) +#define END_COMPARE_Y_X (1686) +#define END_COMPARE_Y_X_TMP2 (1657) +#define SMD_TP2 (1371) +#define CFG_FLICK_IN (2775) +#define SKIP_SWING_END_3 (1790) +#define SMD_TP1 (1346) +#define TILTG75_START (1874) +#define CFG_6 (2955) +#define TILTL75_END (1871) +#define END_SIGMOTDET (1401) +#define EXIT1 (1347) +#define EXIT0 (1330) +#define EXIT3 (1382) +#define EXIT2 (1372) +#define TILTL75_START (1845) +#define CFG_MOTION_BIAS (1410) +#define X_GRT_Y (1610) +#define TEMPLABEL (2526) +#define CFG_GYRO_RAW_DATA (2924) +#define X_GRT_Y_TMP2 (1581) #define D_0_22 (22+512) #define D_0_24 (24+512) @@ -122,6 +155,9 @@ #define CPASS_MTX_20 (37 * 16 + 8) #define CPASS_MTX_21 (37 * 16 + 12) #define CPASS_MTX_22 (43 * 16 + 12) +#define D_EXT_GYRO_BIAS_X (61 * 16) +#define D_EXT_GYRO_BIAS_Y (61 * 16 + 4) +#define D_EXT_GYRO_BIAS_Z (61 * 16 + 8) #define D_ACT0 (40 * 16) #define D_ACSX (40 * 16 + 4) #define D_ACSY (40 * 16 + 8) @@ -132,38 +168,56 @@ #define FLICK_LOWER (45 * 16 + 12) #define FLICK_UPPER (46 * 16 + 12) -#define D_AUTH_OUT (992) -#define D_AUTH_IN (996) -#define D_AUTH_A (1000) -#define D_AUTH_B (1004) +#define D_SMD_ENABLE (49 * 16) +#define D_SMD_ACCEL_THLD (53 * 16 + 8) +#define D_SMD_DELAY_THLD (54 * 16 + 4) +#define D_SMD_DELAY2_THLD (54 * 16 + 12) +#define D_SMD_EXE_STATE (55 * 16) +#define D_SMD_DELAY_CNTR (54 * 16) + +#define D_AUTH_OUT (992) +#define D_AUTH_IN (996) +#define D_AUTH_A (1000) +#define D_AUTH_B (1004) -#define D_PEDSTD_BP_B (768 + 0x1C) -#define D_PEDSTD_HP_A (768 + 0x78) -#define D_PEDSTD_HP_B (768 + 0x7C) -#define D_PEDSTD_BP_A4 (768 + 0x40) -#define D_PEDSTD_BP_A3 (768 + 0x44) -#define D_PEDSTD_BP_A2 (768 + 0x48) -#define D_PEDSTD_BP_A1 (768 + 0x4C) -#define D_PEDSTD_INT_THRSH (768 + 0x68) -#define D_PEDSTD_CLIP (768 + 0x6C) -#define D_PEDSTD_SB (768 + 0x28) -#define D_PEDSTD_SB_TIME (768 + 0x2C) -#define D_PEDSTD_PEAKTHRSH (768 + 0x98) -#define D_PEDSTD_TIML (768 + 0x2A) -#define D_PEDSTD_TIMH (768 + 0x2E) -#define D_PEDSTD_PEAK (768 + 0X94) -#define D_PEDSTD_STEPCTR (768 + 0x60) -#define D_PEDSTD_TIMECTR (964) -#define D_PEDSTD_DECI (768 + 0xA0) +#define D_PEDSTD_BP_B (768 + 0x1C) +#define D_PEDSTD_HP_A (768 + 0x78) +#define D_PEDSTD_HP_B (768 + 0x7C) +#define D_PEDSTD_BP_A4 (768 + 0x40) +#define D_PEDSTD_BP_A3 (768 + 0x44) +#define D_PEDSTD_BP_A2 (768 + 0x48) +#define D_PEDSTD_BP_A1 (768 + 0x4C) +#define D_PEDSTD_INT_THRSH (768 + 0x68) +#define D_PEDSTD_CLIP (768 + 0x6C) +#define D_PEDSTD_SB (768 + 0x28) +#define D_PEDSTD_SB_TIME (768 + 0x2C) +#define D_PEDSTD_PEAKTHRSH (768 + 0x98) +#define D_PEDSTD_TIML (768 + 0x2A) +#define D_PEDSTD_TIMH (768 + 0x2E) +#define D_PEDSTD_PEAK (768 + 0X94) +#define D_PEDSTD_STEPCTR (768 + 0x60) +#define D_PEDSTD_TIMECTR (964) +#define D_PEDSTD_DECI (768 + 0xA0) -#define D_HOST_NO_MOT (976) +#define D_HOST_NO_MOT (976) +#define D_ACCEL_BIAS (660) + +#define D_ORIENT_GAP (76) + +#define D_TILT0_H (48) +#define D_TILT0_L (50) +#define D_TILT1_H (52) +#define D_TILT1_L (54) +#define D_TILT2_H (56) +#define D_TILT2_L (58) +#define D_TILT3_H (60) +#define D_TILT3_L (62) static const struct tKeyLabel dmpTConfig[] = { {KEY_CFG_27, CFG_27}, {KEY_CFG_20, CFG_20}, {KEY_CFG_23, CFG_23}, {KEY_CFG_FIFO_ON_EVENT, CFG_FIFO_ON_EVENT}, - {KEY_CFG_ORIENT_IRQ_1, CFG_ORIENT_IRQ_1}, {KEY_CGNOTICE_INTR, CGNOTICE_INTR}, {KEY_X_GRT_Y_TMP, X_GRT_Y_TMP}, {KEY_CFG_DR_INT, CFG_DR_INT}, @@ -177,13 +231,12 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_CFG_DISPLAY_ORIENT_INT, CFG_DISPLAY_ORIENT_INT}, {KEY_FCFG_7, FCFG_7}, {KEY_FCFG_6, FCFG_6}, - {KEY_NO_ORIENT_INTERRUPT, NO_ORIENT_INTERRUPT}, {KEY_CFG_8, CFG_8}, {KEY_CFG_15, CFG_15}, {KEY_CFG_16, CFG_16}, + {KEY_CFG_EXT_GYRO_BIAS, CFG_EXT_GYRO_BIAS}, {KEY_END_COMPARE_Y_X_TMP, END_COMPARE_Y_X_TMP}, {KEY_CFG_6, CFG_6}, - {KEY_END_ORIENT_1, END_ORIENT_1}, {KEY_END_COMPARE_Y_X, END_COMPARE_Y_X}, {KEY_CFG_LP_QUAT, CFG_LP_QUAT}, {KEY_END_ORIENT, END_ORIENT}, @@ -192,8 +245,8 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_CFG_MOTION_BIAS, CFG_MOTION_BIAS}, {KEY_X_GRT_Y, X_GRT_Y}, {KEY_TEMPLABEL, TEMPLABEL}, - {KEY_NOT_TIME_MINUS_1, NOT_TIME_MINUS_1}, {KEY_END_COMPARE_Y_X_TMP3, END_COMPARE_Y_X_TMP3}, + {KEY_CFG_GYRO_RAW_DATA, CFG_GYRO_RAW_DATA}, {KEY_X_GRT_Y_TMP2, X_GRT_Y_TMP2}, {KEY_D_0_22, D_0_22}, {KEY_D_0_96, D_0_96}, @@ -210,6 +263,7 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_D_1_92, D_1_92}, {KEY_D_1_160, D_1_160}, {KEY_D_1_176, D_1_176}, + {KEY_D_1_178, D_1_178}, {KEY_D_1_218, D_1_218}, {KEY_D_1_232, D_1_232}, {KEY_D_1_250, D_1_250}, @@ -263,7 +317,28 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_D_PEDSTD_STEPCTR, D_PEDSTD_STEPCTR}, {KEY_D_PEDSTD_TIMECTR, D_PEDSTD_TIMECTR}, {KEY_D_PEDSTD_DECI, D_PEDSTD_DECI}, - {KEY_D_HOST_NO_MOT, D_HOST_NO_MOT} + {KEY_D_HOST_NO_MOT, D_HOST_NO_MOT}, + {KEY_D_ACCEL_BIAS, D_ACCEL_BIAS}, + {KEY_D_ORIENT_GAP, D_ORIENT_GAP}, + {KEY_D_TILT0_H, D_TILT0_H}, + {KEY_D_TILT0_L, D_TILT0_L}, + {KEY_D_TILT1_H, D_TILT1_H}, + {KEY_D_TILT1_L, D_TILT1_L}, + {KEY_D_TILT2_H, D_TILT2_H}, + {KEY_D_TILT2_L, D_TILT2_L}, + {KEY_D_TILT3_H, D_TILT3_H}, + {KEY_D_TILT3_L, D_TILT3_L}, + {KEY_CFG_EXT_GYRO_BIAS_X, D_EXT_GYRO_BIAS_X}, + {KEY_CFG_EXT_GYRO_BIAS_Y, D_EXT_GYRO_BIAS_Y}, + {KEY_CFG_EXT_GYRO_BIAS_Z, D_EXT_GYRO_BIAS_Z}, + {KEY_SMD_ENABLE, D_SMD_ENABLE}, + {KEY_SMD_ACCEL_THLD, D_SMD_ACCEL_THLD}, + {KEY_SMD_DELAY_THLD, D_SMD_DELAY_THLD}, + {KEY_SMD_DELAY2_THLD, D_SMD_DELAY2_THLD}, + {KEY_SMD_ENABLE_TESTPT1, SMD_TP1}, + {KEY_SMD_ENABLE_TESTPT2, SMD_TP2}, + {KEY_SMD_EXE_STATE, D_SMD_EXE_STATE}, + {KEY_SMD_DELAY_CNTR, D_SMD_DELAY_CNTR} }; #define NUM_LOCAL_KEYS (sizeof(dmpTConfig)/sizeof(dmpTConfig[0])) @@ -272,6 +347,7 @@ static struct tKeyLabel keys[NUM_KEYS]; unsigned short inv_dmp_get_address(unsigned short key) { static int isSorted; + if (!isSorted) { int kk; for (kk = 0; kk < NUM_KEYS; ++kk) { @@ -286,6 +362,3 @@ unsigned short inv_dmp_get_address(unsigned short key) return 0xffff; return keys[key].addr; } -/** - * @} - */ diff --git a/drivers/staging/iio/imu/mpu/dmpKey.h b/drivers/staging/iio/imu/mpu/dmpKey.h index e8e19515172..f03d7da0060 100644 --- a/drivers/staging/iio/imu/mpu/dmpKey.h +++ b/drivers/staging/iio/imu/mpu/dmpKey.h @@ -11,6 +11,17 @@ * GNU General Public License for more details. * */ +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file dmpKey.h + * @brief dmp Key definition + * @details This file is part of invensense mpu driver code + * + */ + #ifndef DMPKEY_H__ #define DMPKEY_H__ @@ -76,9 +87,33 @@ #define KEY_FCFG_MAG_VAL (KEY_CFG_ORIENT_IRQ_3 + 1) #define KEY_FCFG_MAG_MOV (KEY_FCFG_MAG_VAL + 1) #define KEY_CFG_LP_QUAT (KEY_FCFG_MAG_MOV + 1) +#define KEY_CFG_GYRO_RAW_DATA (KEY_CFG_LP_QUAT + 1) +#define KEY_CFG_EXT_GYRO_BIAS (KEY_CFG_GYRO_RAW_DATA + 1) +#define KEY_CFG_EXT_GYRO_BIAS_X (KEY_CFG_EXT_GYRO_BIAS + 1) +#define KEY_CFG_EXT_GYRO_BIAS_Y (KEY_CFG_EXT_GYRO_BIAS_X + 1) +#define KEY_CFG_EXT_GYRO_BIAS_Z (KEY_CFG_EXT_GYRO_BIAS_Y + 1) +#define KEY_bad_compass (KEY_CFG_EXT_GYRO_BIAS_Z + 1) +#define KEY_COMPASS_CHG_SENSITIVITY (KEY_bad_compass + 1) +#define KEY_CCS_HEADING_THLD (KEY_COMPASS_CHG_SENSITIVITY + 1) +#define KEY_CCS_TIME_THLD (KEY_CCS_HEADING_THLD + 1) +#define KEY_CCS_DOTP_THLD (KEY_CCS_TIME_THLD + 1) +#define KEY_CFG_NM_DET (KEY_CCS_DOTP_THLD + 1) +#define KEY_SMD_ENABLE (KEY_CFG_NM_DET + 1) +#define KEY_SMD_ACCEL_THLD (KEY_SMD_ENABLE + 1) +#define KEY_SMD_DELAY_THLD (KEY_SMD_ACCEL_THLD + 1) +#define KEY_SMD_DELAY2_THLD (KEY_SMD_DELAY_THLD + 1) +#define KEY_SMD_ENABLE_TESTPT1 (KEY_SMD_DELAY2_THLD + 1) +#define KEY_SMD_ENABLE_TESTPT2 (KEY_SMD_ENABLE_TESTPT1 + 1) +#define KEY_SMD_EXE_STATE (KEY_SMD_ENABLE_TESTPT2 + 1) +#define KEY_SMD_DELAY_CNTR (KEY_SMD_EXE_STATE + 1) + +#define KEY_BREAK (80) +#if KEY_SMD_DELAY_CNTR != KEY_BREAK +#error +#endif /* MPU6050 keys */ -#define KEY_CFG_ACCEL_FILTER (KEY_CFG_LP_QUAT + 1) +#define KEY_CFG_ACCEL_FILTER (KEY_BREAK + 1) #define KEY_CFG_MOTION_BIAS (KEY_CFG_ACCEL_FILTER + 1) #define KEY_TEMPLABEL (KEY_CFG_MOTION_BIAS + 1) @@ -152,16 +187,24 @@ #define KEY_D_GYRO_BIAS_X (KEY_D_2_252 + 1) #define KEY_D_GYRO_BIAS_Y (KEY_D_GYRO_BIAS_X + 1) #define KEY_D_GYRO_BIAS_Z (KEY_D_GYRO_BIAS_Y + 1) -#define KEY_D_GYRO_ENABLE (KEY_D_GYRO_BIAS_Z + 1) +#define KEY_D_ACC_BIAS_X (KEY_D_GYRO_BIAS_Z + 1) +#define KEY_D_ACC_BIAS_Y (KEY_D_ACC_BIAS_X + 1) +#define KEY_D_ACC_BIAS_Z (KEY_D_ACC_BIAS_Y + 1) +#define KEY_D_GYRO_ENABLE (KEY_D_ACC_BIAS_Z + 1) #define KEY_D_ACCEL_ENABLE (KEY_D_GYRO_ENABLE + 1) #define KEY_D_QUAT_ENABLE (KEY_D_ACCEL_ENABLE + 1) -#define KEY_D_CR_TIME_G (KEY_D_QUAT_ENABLE + 1) +#define KEY_D_OUTPUT_ENABLE (KEY_D_QUAT_ENABLE + 1) +#define KEY_D_ACCEL_CNTR (KEY_D_OUTPUT_ENABLE + 1) +#define KEY_D_GYRO_CNTR (KEY_D_ACCEL_CNTR + 1) +#define KEY_D_QUAT0_CNTR (KEY_D_GYRO_CNTR + 1) +#define KEY_D_QUAT1_CNTR (KEY_D_QUAT0_CNTR + 1) +#define KEY_D_QUAT2_CNTR (KEY_D_QUAT1_CNTR + 1) +#define KEY_D_CR_TIME_G (KEY_D_QUAT2_CNTR + 1) #define KEY_D_CR_TIME_A (KEY_D_CR_TIME_G + 1) #define KEY_D_CR_TIME_Q (KEY_D_CR_TIME_A + 1) #define KEY_D_CS_TAX (KEY_D_CR_TIME_Q + 1) #define KEY_D_CS_TAY (KEY_D_CS_TAX + 1) #define KEY_D_CS_TAZ (KEY_D_CS_TAY + 1) - #define KEY_D_CS_TGX (KEY_D_CS_TAZ + 1) #define KEY_D_CS_TGY (KEY_D_CS_TGX + 1) #define KEY_D_CS_TGZ (KEY_D_CS_TGY + 1) @@ -171,7 +214,8 @@ #define KEY_D_CS_TQ3 (KEY_D_CS_TQ2 + 1) /* Compass keys */ -#define KEY_CPASS_BIAS_X (KEY_D_CS_TQ3 + 1) +#define KEY_CPASS_GAIN (KEY_D_CS_TQ3 + 1) +#define KEY_CPASS_BIAS_X (KEY_CPASS_GAIN + 1) #define KEY_CPASS_BIAS_Y (KEY_CPASS_BIAS_X + 1) #define KEY_CPASS_BIAS_Z (KEY_CPASS_BIAS_Y + 1) #define KEY_CPASS_MTX_00 (KEY_CPASS_BIAS_Z + 1) @@ -213,8 +257,27 @@ #define KEY_END_COMPARE_Y_X_TMP3 (KEY_NOT_TIME_MINUS_1 + 1) #define KEY_X_GRT_Y_TMP2 (KEY_END_COMPARE_Y_X_TMP3 + 1) +/*Shake Keys */ +#define KEY_D_0_64 (KEY_X_GRT_Y_TMP2 + 1) +#define KEY_D_2_4 (KEY_D_0_64 + 1) +#define KEY_D_2_8 (KEY_D_2_4 + 1) +#define KEY_D_2_48 (KEY_D_2_8 + 1) +#define KEY_D_2_92 (KEY_D_2_48 + 1) +#define KEY_D_2_94 (KEY_D_2_92 + 1) +#define KEY_D_2_160 (KEY_D_2_94 + 1) +#define KEY_D_3_180 (KEY_D_2_160 + 1) +#define KEY_D_3_184 (KEY_D_3_180 + 1) +#define KEY_D_3_188 (KEY_D_3_184 + 1) +#define KEY_D_3_208 (KEY_D_3_188 + 1) +#define KEY_D_3_240 (KEY_D_3_208 + 1) +#define KEY_RETRACTION_1 (KEY_D_3_240 + 1) +#define KEY_RETRACTION_2 (KEY_RETRACTION_1 + 1) +#define KEY_RETRACTION_3 (KEY_RETRACTION_2 + 1) +#define KEY_RETRACTION_4 (KEY_RETRACTION_3 + 1) +#define KEY_CFG_SHAKE_INT (KEY_RETRACTION_4 + 1) + /* Authenticate Keys */ -#define KEY_D_AUTH_OUT (KEY_X_GRT_Y_TMP2 + 1) +#define KEY_D_AUTH_OUT (KEY_CFG_SHAKE_INT + 1) #define KEY_D_AUTH_IN (KEY_D_AUTH_OUT + 1) #define KEY_D_AUTH_A (KEY_D_AUTH_IN + 1) #define KEY_D_AUTH_B (KEY_D_AUTH_A + 1) @@ -243,39 +306,22 @@ /*Host Based No Motion*/ #define KEY_D_HOST_NO_MOT (KEY_D_PEDSTD_DECI + 1) -/* EIS keys */ -#define KEY_P_EIS_FIFO_FOOTER (KEY_D_HOST_NO_MOT + 1) -#define KEY_P_EIS_FIFO_YSHIFT (KEY_P_EIS_FIFO_FOOTER + 1) -#define KEY_P_EIS_DATA_RATE (KEY_P_EIS_FIFO_YSHIFT + 1) -#define KEY_P_EIS_FIFO_XSHIFT (KEY_P_EIS_DATA_RATE + 1) -#define KEY_P_EIS_FIFO_SYNC (KEY_P_EIS_FIFO_XSHIFT + 1) -#define KEY_P_EIS_FIFO_ZSHIFT (KEY_P_EIS_FIFO_SYNC + 1) -#define KEY_P_EIS_FIFO_READY (KEY_P_EIS_FIFO_ZSHIFT + 1) -#define KEY_DMP_FOOTER (KEY_P_EIS_FIFO_READY + 1) -#define KEY_DMP_INTX_HC (KEY_DMP_FOOTER + 1) -#define KEY_DMP_INTX_PH (KEY_DMP_INTX_HC + 1) -#define KEY_DMP_INTX_SH (KEY_DMP_INTX_PH + 1) -#define KEY_DMP_AINV_SH (KEY_DMP_INTX_SH + 1) -#define KEY_DMP_A_INV_XH (KEY_DMP_AINV_SH + 1) -#define KEY_DMP_AINV_PH (KEY_DMP_A_INV_XH + 1) -#define KEY_DMP_CTHX_H (KEY_DMP_AINV_PH + 1) -#define KEY_DMP_CTHY_H (KEY_DMP_CTHX_H + 1) -#define KEY_DMP_CTHZ_H (KEY_DMP_CTHY_H + 1) -#define KEY_DMP_NCTHX_H (KEY_DMP_CTHZ_H + 1) -#define KEY_DMP_NCTHY_H (KEY_DMP_NCTHX_H + 1) -#define KEY_DMP_NCTHZ_H (KEY_DMP_NCTHY_H + 1) -#define KEY_DMP_CTSQ_XH (KEY_DMP_NCTHZ_H + 1) -#define KEY_DMP_CTSQ_YH (KEY_DMP_CTSQ_XH + 1) -#define KEY_DMP_CTSQ_ZH (KEY_DMP_CTSQ_YH + 1) -#define KEY_DMP_INTX_H (KEY_DMP_CTSQ_ZH + 1) -#define KEY_DMP_INTY_H (KEY_DMP_INTX_H + 1) -#define KEY_DMP_INTZ_H (KEY_DMP_INTY_H + 1) -#define KEY_DMP_HPX_H (KEY_DMP_INTZ_H + 1) -#define KEY_DMP_HPY_H (KEY_DMP_HPX_H + 1) -#define KEY_DMP_HPZ_H (KEY_DMP_HPY_H + 1) +/*Host Based Accel Bias*/ +#define KEY_D_ACCEL_BIAS (KEY_D_HOST_NO_MOT + 1) + +/*Screen/Display Orientation Keys*/ +#define KEY_D_ORIENT_GAP (KEY_D_ACCEL_BIAS + 1) +#define KEY_D_TILT0_H (KEY_D_ORIENT_GAP + 1) +#define KEY_D_TILT0_L (KEY_D_TILT0_H + 1) +#define KEY_D_TILT1_H (KEY_D_TILT0_L + 1) +#define KEY_D_TILT1_L (KEY_D_TILT1_H + 1) +#define KEY_D_TILT2_H (KEY_D_TILT1_L + 1) +#define KEY_D_TILT2_L (KEY_D_TILT2_H + 1) +#define KEY_D_TILT3_H (KEY_D_TILT2_L + 1) +#define KEY_D_TILT3_L (KEY_D_TILT3_H + 1) /* Stream keys */ -#define KEY_STREAM_P_GYRO_Z (KEY_DMP_HPZ_H + 1) +#define KEY_STREAM_P_GYRO_Z (KEY_D_TILT3_L + 1) #define KEY_STREAM_P_GYRO_Y (KEY_STREAM_P_GYRO_Z + 1) #define KEY_STREAM_P_GYRO_X (KEY_STREAM_P_GYRO_Y + 1) #define KEY_STREAM_P_TEMP (KEY_STREAM_P_GYRO_X + 1) diff --git a/drivers/staging/iio/imu/mpu/dmpmap.h b/drivers/staging/iio/imu/mpu/dmpmap.h index 7dc354a33a2..28f59af0157 100644 --- a/drivers/staging/iio/imu/mpu/dmpmap.h +++ b/drivers/staging/iio/imu/mpu/dmpmap.h @@ -1,7 +1,26 @@ /* - $License: - Copyright (C) 2011 InvenSense Corporation, All Rights Reserved. - $ +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file dmpmap.h + * @brief dmp map definition + * @details This file is part of invensense mpu driver code + * */ #ifndef DMPMAP_H #define DMPMAP_H diff --git a/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c b/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c index bac55f1f019..70b19018d6d 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu3050_iio.c @@ -17,9 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_mpu3050.c + * @file inv_mpu3050_iio.c * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @details This file is part of invensense mpu driver code */ #include @@ -38,15 +38,21 @@ #include #include "inv_mpu_iio.h" -#define MPU3050_NACK_TIME (2*1000) -#define MPU3050_ONE_MPU_TIME (20) -#define MPU3050_BOGUS_ADDR (0x7F) +#define MPU3050_NACK_MIN_TIME (2 * 1000) +#define MPU3050_NACK_MAX_TIME (3 * 1000) -int set_3050_bypass(struct inv_gyro_state_s *st, int enable) +#define MPU3050_ONE_MPU_TIME 20 +#define MPU3050_BOGUS_ADDR 0x7F +int __attribute__((weak)) inv_register_mpu3050_slave(struct inv_mpu_iio_s *st) +{ + return 0; +} + +int set_3050_bypass(struct inv_mpu_iio_s *st, bool enable) { struct inv_reg_map_s *reg; int result; - unsigned char b; + u8 b; reg = &st->reg; result = inv_i2c_read(st, reg->user_ctrl, 1, &b); @@ -80,7 +86,7 @@ int set_3050_bypass(struct inv_gyro_state_s *st, int enable) * 2) wait enough time for a nack to occur, then go into * bypass mode: */ - usleep_range(MPU3050_NACK_TIME, MPU3050_NACK_TIME); + usleep_range(MPU3050_NACK_MIN_TIME, MPU3050_NACK_MAX_TIME); result = inv_i2c_single_write(st, reg->user_ctrl, b); if (result) return result; @@ -97,25 +103,65 @@ int set_3050_bypass(struct inv_gyro_state_s *st, int enable) result = inv_i2c_single_write(st, reg->user_ctrl, b); if (result) return result; - usleep_range(MPU3050_NACK_TIME, MPU3050_NACK_TIME); + usleep_range(MPU3050_NACK_MIN_TIME, MPU3050_NACK_MAX_TIME); } return 0; } void inv_setup_reg_mpu3050(struct inv_reg_map_s *reg) { - reg->fifo_en = 0x12; - reg->sample_rate_div = 0x15; - reg->lpf = 0x16; - reg->fifo_count_h = 0x3a; - reg->fifo_r_w = 0x3c; - reg->user_ctrl = 0x3d; - reg->pwr_mgmt_1 = 0x3e; - reg->raw_gyro = 0x1d; - reg->raw_accl = 0x23; - reg->temperature = 0x1b; - reg->int_enable = 0x17; - reg->int_status = 0x1a; + reg->fifo_en = REG_3050_FIFO_EN; + reg->sample_rate_div = REG_3050_SAMPLE_RATE_DIV; + reg->lpf = REG_3050_LPF; + reg->fifo_count_h = REG_3050_FIFO_COUNT_H; + reg->fifo_r_w = REG_3050_FIFO_R_W; + reg->user_ctrl = REG_3050_USER_CTRL; + reg->pwr_mgmt_1 = REG_3050_PWR_MGMT_1; + reg->raw_gyro = REG_3050_RAW_GYRO; + reg->raw_accl = REG_3050_AUX_XOUT_H; + reg->temperature = REG_3050_TEMPERATURE; + reg->int_enable = REG_3050_INT_ENABLE; + reg->int_status = REG_3050_INT_STATUS; +} + +int inv_switch_3050_gyro_engine(struct inv_mpu_iio_s *st, bool en) +{ + struct inv_reg_map_s *reg; + u8 data, p; + int result; + reg = &st->reg; + if (en) { + data = INV_CLK_PLL; + p = (BITS_3050_POWER1 | data); + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + if (result) + return result; + p = (BITS_3050_POWER2 | data); + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + if (result) + return result; + p = data; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + msleep(SENSOR_UP_TIME); + } else { + p = BITS_3050_GYRO_STANDBY; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); + } + + return result; +} + +int inv_switch_3050_accl_engine(struct inv_mpu_iio_s *st, bool en) +{ + int result; + if (NULL == st->mpu_slave) + return -EPERM; + if (en) + result = st->mpu_slave->resume(st); + else + result = st->mpu_slave->suspend(st); + + return result; } /** @@ -131,8 +177,9 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) { struct inv_reg_map_s *reg; int result; - unsigned char data; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + u8 data; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + if (st->chip_config.is_asleep) return -EPERM; /*reading AUX VDDIO register */ @@ -147,9 +194,6 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) return result; reg = &st->reg; - result = set_inv_enable(indio_dev, 0); - if (result) - return result; /*2000dps full scale range*/ result = inv_i2c_single_write(st, reg->lpf, (INV_FSR_2000DPS << GYRO_CONFIG_FSR_SHIFT) @@ -163,11 +207,12 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) if (result) return result; st->chip_config.fifo_rate = INIT_FIFO_RATE; - st->irq_dur_us = INIT_DUR_TIME; + st->irq_dur_ns = INIT_DUR_TIME; st->chip_config.prog_start_addr = DMP_START_ADDR; st->chip_config.gyro_enable = 1; st->chip_config.gyro_fifo_enable = 1; - if (SECONDARY_SLAVE_TYPE_ACCEL == st->plat_data.sec_slave_type) { + if ((SECONDARY_SLAVE_TYPE_ACCEL == st->plat_data.sec_slave_type) && + st->mpu_slave) { result = st->mpu_slave->setup(st); if (result) return result; @@ -180,23 +225,24 @@ int inv_init_config_mpu3050(struct iio_dev *indio_dev) st->chip_config.accl_enable = 1; st->chip_config.accl_fifo_enable = 1; } + return 0; } + /** * set_power_mpu3050() - set power of mpu3050. * @st: Device driver instance. * @power_on: on/off */ -int set_power_mpu3050(struct inv_gyro_state_s *st, - unsigned char power_on) +int set_power_mpu3050(struct inv_mpu_iio_s *st, bool power_on) { struct inv_reg_map_s *reg; - unsigned char data, p; + u8 data, p; int result; reg = &st->reg; - if (power_on) + if (power_on) { data = 0; - else { + } else { if (st->mpu_slave) { result = st->mpu_slave->suspend(st); if (result) @@ -219,14 +265,11 @@ int set_power_mpu3050(struct inv_gyro_state_s *st, result = inv_i2c_single_write(st, reg->pwr_mgmt_1, data | p); if (result) return result; - - st->chip_config.clk_src = INV_CLK_PLL; } else { data |= (BITS_3050_GYRO_STANDBY | INV_CLK_INTERNAL); result = inv_i2c_single_write(st, reg->pwr_mgmt_1, data); if (result) return result; - st->chip_config.clk_src = INV_CLK_INTERNAL; } if (power_on) { msleep(POWER_UP_TIME); @@ -235,9 +278,9 @@ int set_power_mpu3050(struct inv_gyro_state_s *st, if (result) return result; } - st->chip_config.is_asleep = 0; - } else - st->chip_config.is_asleep = 1; + } + st->chip_config.is_asleep = !power_on; + return 0; } /** diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_core.c b/drivers/staging/iio/imu/mpu/inv_mpu_core.c index 94a5ccfc1f8..6368818f3da 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_core.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_core.c @@ -17,11 +17,12 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro.c + * @file inv_mpu_core.c * @brief A sysfs device driver for Invensense devices - * @details This driver currently works for the ITG3500, MPU6050, MPU9150 - * MPU3050 + * @details This driver currently works for the + * MPU3050/MPU6050/MPU9150/MPU6500/MPU9250 devices. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -37,57 +38,75 @@ #include #include #include + #include "inv_mpu_iio.h" #include "../../sysfs.h" -#define CHECK_DMP do \ - { \ - if ((st->chip_config.is_asleep) || \ - (0 == st->chip_config.firmware_loaded)) \ - return -EPERM; \ - result = kstrtoul(buf, 10, (long unsigned int *)&data); \ - if (result) \ - return result; \ - } while (0); -static void inv_setup_reg(struct inv_reg_map_s *reg) +#include "../../inv_test/inv_counters.h" + +s64 get_time_ns(void) { - reg->who_am_i = 0x75; - reg->sample_rate_div = 0x19; - reg->lpf = 0x1A; - reg->product_id = 0x0C; - reg->bank_sel = 0x6D; - reg->user_ctrl = 0x6A; - reg->fifo_en = 0x23; - reg->gyro_config = 0x1B; - reg->accl_config = 0x1C; - reg->fifo_count_h = 0x72; - reg->fifo_r_w = 0x74; - reg->raw_gyro = 0x43; - reg->raw_accl = 0x3B; - reg->temperature = 0x41; - reg->int_enable = 0x38; - reg->int_status = 0x3A; - reg->pwr_mgmt_1 = 0x6B; - reg->pwr_mgmt_2 = 0x6C; - reg->mem_start_addr = 0x6E; - reg->mem_r_w = 0x6F; - reg->prgm_strt_addrh = 0x70; -}; + struct timespec ts; + ktime_get_ts(&ts); + return timespec_to_ns(&ts); +} + +static const short AKM8975_ST_Lower[3] = {-100, -100, -1000}; +static const short AKM8975_ST_Upper[3] = {100, 100, -300}; + +static const short AKM8972_ST_Lower[3] = {-50, -50, -500}; +static const short AKM8972_ST_Upper[3] = {50, 50, -100}; + +static const short AKM8963_ST_Lower[3] = {-200, -200, -3200}; +static const short AKM8963_ST_Upper[3] = {200, 200, -800}; + +/* This is for compatibility for power state. Should remove once HAL + does not use power_state sysfs entry */ +static bool fake_asleep; + static const struct inv_hw_s hw_info[INV_NUM_PARTS] = { {119, "ITG3500"}, { 63, "MPU3050"}, {117, "MPU6050"}, - {118, "MPU9150"} + {118, "MPU9150"}, + {119, "MPU6500"}, + {118, "MPU9250"}, +}; + +static void inv_setup_reg(struct inv_reg_map_s *reg) +{ + reg->sample_rate_div = REG_SAMPLE_RATE_DIV; + reg->lpf = REG_CONFIG; + reg->bank_sel = REG_BANK_SEL; + reg->user_ctrl = REG_USER_CTRL; + reg->fifo_en = REG_FIFO_EN; + reg->gyro_config = REG_GYRO_CONFIG; + reg->accl_config = REG_ACCEL_CONFIG; + reg->fifo_count_h = REG_FIFO_COUNT_H; + reg->fifo_r_w = REG_FIFO_R_W; + reg->raw_gyro = REG_RAW_GYRO; + reg->raw_accl = REG_RAW_ACCEL; + reg->temperature = REG_TEMPERATURE; + reg->int_enable = REG_INT_ENABLE; + reg->int_status = REG_INT_STATUS; + reg->pwr_mgmt_1 = REG_PWR_MGMT_1; + reg->pwr_mgmt_2 = REG_PWR_MGMT_2; + reg->mem_start_addr = REG_MEM_START_ADDR; + reg->mem_r_w = REG_MEM_RW; + reg->prgm_strt_addrh = REG_PRGM_STRT_ADDRH; }; + /** * inv_i2c_read() - Read one or more bytes from the device registers. * @st: Device driver instance. * @reg: First device register to be read from. * @length: Number of bytes to read. * @data: Data read from device. - * NOTE: The slave register will not increment when reading from the FIFO. + * NOTE:This is not re-implementation of i2c_smbus_read because i2c + * address could be specified in this case. We could have two different + * i2c address due to secondary i2c interface. */ -int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, - unsigned char reg, unsigned short length, unsigned char *data) +int inv_i2c_read_base(struct inv_mpu_iio_s *st, u16 i2c_addr, + u8 reg, u16 length, u8 *data) { struct i2c_msg msgs[2]; int res; @@ -106,12 +125,25 @@ int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, msgs[1].len = length; res = i2c_transfer(st->sl_handle, msgs, 2); + if (res < 2) { if (res >= 0) res = -EIO; - return res; } else - return 0; + res = 0; + + INV_I2C_INC_MPUWRITE(3); + INV_I2C_INC_MPUREAD(length); +#if CONFIG_DYNAMIC_DEBUG + { + char *read = 0; + pr_debug("%s RD%02X%02X%02X -> %s%s\n", st->hw->name, + i2c_addr, reg, length, + wr_pr_debug_begin(data, length, read), + wr_pr_debug_end(read)); + } +#endif + return res; } /** @@ -119,14 +151,16 @@ int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, * @st: Device driver instance. * @reg: Device register to be written to. * @data: Byte to write to device. + * NOTE:This is not re-implementation of i2c_smbus_write because i2c + * address could be specified in this case. We could have two different + * i2c address due to secondary i2c interface. */ -int inv_i2c_single_write_base(struct inv_gyro_state_s *st, - unsigned short i2c_addr, unsigned char reg, unsigned char data) +int inv_i2c_single_write_base(struct inv_mpu_iio_s *st, + u16 i2c_addr, u8 reg, u8 data) { - unsigned char tmp[2]; + u8 tmp[2]; struct i2c_msg msg; int res; - tmp[0] = reg; tmp[1] = data; @@ -135,7 +169,9 @@ int inv_i2c_single_write_base(struct inv_gyro_state_s *st, msg.buf = tmp; msg.len = 2; - /*printk(KERN_ERR "WS%02X%02X%02X\n", i2c_addr, reg, data);*/ + pr_debug("%s WR%02X%02X%02X\n", st->hw->name, i2c_addr, reg, data); + INV_I2C_INC_MPUWRITE(3); + res = i2c_transfer(st->sl_handle, &msg, 1); if (res < 1) { if (res == 0) @@ -144,64 +180,123 @@ int inv_i2c_single_write_base(struct inv_gyro_state_s *st, } else return 0; } -static int set_power_itg(struct inv_gyro_state_s *st, - unsigned char power_on) + +static int inv_switch_engine(struct inv_mpu_iio_s *st, bool en, u32 mask) { struct inv_reg_map_s *reg; - unsigned char data; + u8 data, mgmt_1; int result; - reg = &st->reg; - if (power_on) - data = 0; - else - data = BIT_SLEEP; - if (st->chip_config.lpa_mode) - data |= BIT_CYCLE; - if (st->chip_config.gyro_enable) { - result = inv_i2c_single_write(st, - reg->pwr_mgmt_1, data | INV_CLK_PLL); + /* switch clock needs to be careful. Only when gyro is on, can + clock source be switched to gyro. Otherwise, it must be set to + internal clock */ + if (BIT_PWR_GYRO_STBY == mask) { + result = inv_i2c_read(st, reg->pwr_mgmt_1, 1, &mgmt_1); if (result) return result; - st->chip_config.clk_src = INV_CLK_PLL; - } else { - result = inv_i2c_single_write(st, - reg->pwr_mgmt_1, data | INV_CLK_INTERNAL); + + mgmt_1 &= ~BIT_CLK_MASK; + } + + if ((BIT_PWR_GYRO_STBY == mask) && (!en)) { + /* turning off gyro requires switch to internal clock first. + Then turn off gyro engine */ + mgmt_1 |= INV_CLK_INTERNAL; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, + mgmt_1); if (result) return result; - st->chip_config.clk_src = INV_CLK_INTERNAL; } - if (power_on) { - msleep(POWER_UP_TIME); - data = 0; - if (0 == st->chip_config.accl_enable) - data |= BIT_PWR_ACCL_STBY; - if (0 == st->chip_config.gyro_enable) - data |= BIT_PWR_GYRO_STBY; - data |= (st->chip_config.lpa_freq << LPA_FREQ_SHIFT); + result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &data); + if (result) + return result; + if (en) + data &= (~mask); + else + data |= mask; + result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); + if (result) + return result; - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); + if ((BIT_PWR_GYRO_STBY == mask) && en) { + /* only gyro on needs sensor up time */ + msleep(SENSOR_UP_TIME); + /* after gyro is on & stable, switch internal clock to PLL */ + mgmt_1 |= INV_CLK_PLL; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, + mgmt_1); if (result) return result; - msleep(POWER_UP_TIME); - st->chip_config.is_asleep = 0; - } else - st->chip_config.is_asleep = 1; + } + if ((BIT_PWR_ACCL_STBY == mask) && en) + msleep(REG_UP_TIME); + return 0; } + /** - * inv_set_power_state() - Turn device on/off. - * @st: Device driver instance. - * @power_on: 1 to turn on, 0 to suspend. + * inv_lpa_freq() - store current low power frequency setting. */ -int inv_set_power_state(struct inv_gyro_state_s *st, - unsigned char power_on) +static int inv_lpa_freq(struct inv_mpu_iio_s *st, int lpa_freq) { - if (INV_MPU3050 == st->chip_type) - return set_power_mpu3050(st, power_on); + unsigned long result; + u8 d; + struct inv_reg_map_s *reg; + /* this mapping makes 6500 and 6050 setting close */ + /* 2, 4, 6, 7 corresponds to 0.98, 3.91, 15.63, 31.25 */ + const u8 mpu6500_lpa_mapping[] = {2, 4, 6, 7}; + + if (lpa_freq > MAX_LPA_FREQ_PARAM) + return -EINVAL; + + if (INV_MPU6500 == st->chip_type) { + d = mpu6500_lpa_mapping[lpa_freq]; + result = inv_i2c_single_write(st, REG_6500_LP_ACCEL_ODR, d); + if (result) + return result; + } else { + reg = &st->reg; + result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &d); + if (result) + return result; + d &= ~BIT_LPA_FREQ; + d |= (u8)(lpa_freq << LPA_FREQ_SHIFT); + result = inv_i2c_single_write(st, reg->pwr_mgmt_2, d); + if (result) + return result; + } + st->chip_config.lpa_freq = lpa_freq; + + return 0; +} + +static int set_power_itg(struct inv_mpu_iio_s *st, bool power_on) +{ + struct inv_reg_map_s *reg; + u8 data; + int result; + + if ((!power_on) == st->chip_config.is_asleep) + return 0; + reg = &st->reg; + if (power_on) + data = 0; else - return set_power_itg(st, power_on); + data = BIT_SLEEP; + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, data); + if (result) + return result; + + if (power_on) { + if (INV_MPU6500 == st->chip_type) + msleep(POWER_UP_TIME); + else + msleep(REG_UP_TIME); + } + + st->chip_config.is_asleep = !power_on; + return 0; } @@ -212,25 +307,21 @@ int inv_set_power_state(struct inv_gyro_state_s *st, * FSR: +/- 2000DPS * DLPF: 42Hz * FIFO rate: 50Hz - * Clock source: Gyro PLL */ static int inv_init_config(struct iio_dev *indio_dev) { struct inv_reg_map_s *reg; int result; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + - if (st->chip_config.is_asleep) - return -EPERM; reg = &st->reg; - result = set_inv_enable(indio_dev, 0); - if (result) - return result; result = inv_i2c_single_write(st, reg->gyro_config, INV_FSR_2000DPS << GYRO_CONFIG_FSR_SHIFT); if (result) return result; + st->chip_config.fsr = INV_FSR_2000DPS; result = inv_i2c_single_write(st, reg->lpf, INV_FILTER_42HZ); @@ -239,21 +330,17 @@ static int inv_init_config(struct iio_dev *indio_dev) st->chip_config.lpf = INV_FILTER_42HZ; result = inv_i2c_single_write(st, reg->sample_rate_div, - ONE_K_HZ/INIT_FIFO_RATE - 1); - if (result) - return result; - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, - st->plat_data.int_config & (~BIT_BYPASS_EN)); + ONE_K_HZ / INIT_FIFO_RATE - 1); if (result) return result; st->chip_config.fifo_rate = INIT_FIFO_RATE; - st->irq_dur_us = INIT_DUR_TIME; + st->chip_config.new_fifo_rate = INIT_FIFO_RATE; + st->irq_dur_ns = INIT_DUR_TIME; st->chip_config.prog_start_addr = DMP_START_ADDR; - st->chip_config.gyro_enable = 1; - st->chip_config.gyro_fifo_enable = 1; + st->chip_config.dmp_output_rate = INIT_DMP_OUTPUT_RATE; + st->self_test.samples = INIT_ST_SAMPLES; + st->self_test.threshold = INIT_ST_THRESHOLD; if (INV_ITG3500 != st->chip_type) { - st->chip_config.accl_enable = 1; - st->chip_config.accl_fifo_enable = 1; st->chip_config.accl_fs = INV_FS_02G; result = inv_i2c_single_write(st, reg->accl_config, (INV_FS_02G << ACCL_CONFIG_FSR_SHIFT)); @@ -262,13 +349,30 @@ static int inv_init_config(struct iio_dev *indio_dev) st->tap.time = INIT_TAP_TIME; st->tap.thresh = INIT_TAP_THRESHOLD; st->tap.min_count = INIT_TAP_MIN_COUNT; + st->smd.threshold = MPU_INIT_SMD_THLD; + st->smd.delay = MPU_INIT_SMD_DELAY_THLD; + st->smd.delay2 = MPU_INIT_SMD_DELAY2_THLD; + + result = inv_i2c_single_write(st, REG_ACCEL_MOT_DUR, + INIT_MOT_DUR); + if (result) + return result; + st->mot_int.mot_dur = INIT_MOT_DUR; + + result = inv_i2c_single_write(st, REG_ACCEL_MOT_THR, + INIT_MOT_THR); + if (result) + return result; + st->mot_int.mot_thr = INIT_MOT_THR; } + return 0; } + /** * inv_compass_scale_show() - show compass scale. */ -static int inv_compass_scale_show(struct inv_gyro_state_s *st, int *scale) +static int inv_compass_scale_show(struct inv_mpu_iio_s *st, int *scale) { if (COMPASS_ID_AK8975 == st->plat_data.sec_slave_id) *scale = DATA_AKM8975_SCALE; @@ -281,7 +385,26 @@ static int inv_compass_scale_show(struct inv_gyro_state_s *st, int *scale) *scale = DATA_AKM8963_SCALE0; else return -EINVAL; - *scale *= (1L << 15); + + return IIO_VAL_INT; +} + +/** + * inv_sensor_show() - Read gyro/accel data directly from registers. + */ +static int inv_sensor_show(struct inv_mpu_iio_s *st, int reg, int axis, + int *val) +{ + int ind, result; + u8 d[2]; + + ind = (axis - IIO_MOD_X) * 2; + result = i2c_smbus_read_i2c_block_data(st->client, + reg + ind, 2, d); + if (result != 2) + return -EINVAL; + *val = (short)be16_to_cpup((__be16 *)(d)); + return IIO_VAL_INT; } @@ -289,59 +412,109 @@ static int inv_compass_scale_show(struct inv_gyro_state_s *st, int *scale) * mpu_read_raw() - read raw method. */ static int mpu_read_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int *val, - int *val2, - long mask) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct inv_mpu_iio_s *st = iio_priv(indio_dev); int result; - if (st->chip_config.is_asleep) - return -EINVAL; + switch (mask) { case 0: - if (chan->type == IIO_ANGL_VEL) { - *val = st->raw_gyro[chan->channel2 - IIO_MOD_X]; - return IIO_VAL_INT; - } - if (chan->type == IIO_ACCEL) { - *val = st->raw_accel[chan->channel2 - IIO_MOD_X]; - return IIO_VAL_INT; - } - if (chan->type == IIO_MAGN) { + /* if enabled, power is on already */ + if (!st->chip_config.enable) + return -EBUSY; + switch (chan->type) { + case IIO_ANGL_VEL: + if (!st->chip_config.gyro_enable) + return -EPERM; + return inv_sensor_show(st, st->reg.raw_gyro, + chan->channel2, val); + case IIO_ACCEL: + if (!st->chip_config.accl_enable) + return -EPERM; + return inv_sensor_show(st, st->reg.raw_accl, + chan->channel2, val); + case IIO_MAGN: + if (!st->chip_config.compass_enable) + return -EPERM; *val = st->raw_compass[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; + case IIO_QUATERNION: + if (!(st->chip_config.dmp_on + && st->chip_config.quaternion_on)) + return -EPERM; + if (IIO_MOD_R == chan->channel2) + *val = st->raw_quaternion[0]; + else + *val = st->raw_quaternion[chan->channel2 - + IIO_MOD_X + 1]; + return IIO_VAL_INT; + default: + return -EINVAL; } - return -EINVAL; case IIO_CHAN_INFO_SCALE: - if (chan->type == IIO_ANGL_VEL) { - *val = (1 << st->chip_config.fsr)*GYRO_DPS_SCALE; + switch (chan->type) { + case IIO_ANGL_VEL: + { + const s16 gyro_scale[] = {250, 500, 1000, 2000}; + + *val = gyro_scale[st->chip_config.fsr]; + return IIO_VAL_INT; } - if (chan->type == IIO_ACCEL) { - *val = (2 << st->chip_config.accl_fs); + case IIO_ACCEL: + { + const s16 accel_scale[] = {2, 4, 8, 16}; + *val = accel_scale[st->chip_config.accl_fs] * + st->chip_info.multi; return IIO_VAL_INT; } - if (chan->type == IIO_MAGN) + case IIO_MAGN: return inv_compass_scale_show(st, val); - return -EINVAL; + default: + return -EINVAL; + } case IIO_CHAN_INFO_CALIBBIAS: if (st->chip_config.self_test_run_once == 0) { + /* This can only be run when enable is zero */ + if (st->chip_config.enable) + return -EBUSY; + mutex_lock(&indio_dev->mlock); + + result = inv_power_up_self_test(st); + if (result) + goto error_info_calibbias; result = inv_do_test(st, 0, st->gyro_bias, st->accel_bias); if (result) - return result; + goto error_info_calibbias; st->chip_config.self_test_run_once = 1; +error_info_calibbias: + /* Reset Accel and Gyro full scale range + back to default value */ + inv_recover_setting(st); + mutex_unlock(&indio_dev->mlock); } - if (chan->type == IIO_ANGL_VEL) { + switch (chan->type) { + case IIO_ANGL_VEL: *val = st->gyro_bias[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; + case IIO_ACCEL: + *val = st->accel_bias[chan->channel2 - IIO_MOD_X] * + st->chip_info.multi; + return IIO_VAL_INT; + default: + return -EINVAL; } - if (chan->type == IIO_ACCEL) { - *val = st->accel_bias[chan->channel2 - IIO_MOD_X]; + case IIO_CHAN_INFO_OFFSET: + switch (chan->type) { + case IIO_ACCEL: + *val = st->input_accel_bias[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; + default: + return -EINVAL; } - return -EINVAL; default: return -EINVAL; } @@ -350,7 +523,7 @@ static int mpu_read_raw(struct iio_dev *indio_dev, /** * inv_write_fsr() - Configure the gyro's scale range. */ -static int inv_write_fsr(struct inv_gyro_state_s *st, int fsr) +static int inv_write_fsr(struct inv_mpu_iio_s *st, int fsr) { struct inv_reg_map_s *reg; int result; @@ -360,68 +533,65 @@ static int inv_write_fsr(struct inv_gyro_state_s *st, int fsr) if (fsr == st->chip_config.fsr) return 0; - if (INV_MPU3050 == st->chip_type) { + if (INV_MPU3050 == st->chip_type) result = inv_i2c_single_write(st, reg->lpf, (fsr << GYRO_CONFIG_FSR_SHIFT) | st->chip_config.lpf); - } else { + else result = inv_i2c_single_write(st, reg->gyro_config, fsr << GYRO_CONFIG_FSR_SHIFT); - } + if (result) return result; st->chip_config.fsr = fsr; + return 0; } /** * inv_write_accel_fs() - Configure the accelerometer's scale range. */ -static int inv_write_accel_fs(struct inv_gyro_state_s *st, int fs) +static int inv_write_accel_fs(struct inv_mpu_iio_s *st, int fs) { int result; struct inv_reg_map_s *reg; - reg = &st->reg; + reg = &st->reg; if (fs < 0 || fs > MAX_ACCL_FS_PARAM) return -EINVAL; if (fs == st->chip_config.accl_fs) return 0; - if (INV_MPU3050 == st->chip_type) { + if (INV_MPU3050 == st->chip_type) result = st->mpu_slave->set_fs(st, fs); - if (result) - return result; - } else { + else result = inv_i2c_single_write(st, reg->accl_config, (fs << ACCL_CONFIG_FSR_SHIFT)); - if (result) - return result; - } - /* reset fifo because the data could be mixed with old bad data */ + if (result) + return result; + st->chip_config.accl_fs = fs; + return 0; } + /** * inv_write_compass_scale() - Configure the compass's scale range. */ -static int inv_write_compass_scale(struct inv_gyro_state_s *st, int data) +static int inv_write_compass_scale(struct inv_mpu_iio_s *st, int data) { char d, en; int result; if (COMPASS_ID_AK8963 != st->plat_data.sec_slave_id) return 0; - if (data) - en = 1; - else - en = 0; + en = !!data; if (st->compass_scale == en) return 0; - d = (1 | (st->compass_scale << AKM8963_SCALE_SHIFT)); + d = (DATA_AKM_MODE_SM | (st->compass_scale << AKM8963_SCALE_SHIFT)); result = inv_i2c_single_write(st, REG_I2C_SLV1_DO, d); if (result) return result; st->compass_scale = en; - return 0; + return 0; } /** @@ -432,1083 +602,506 @@ static int mpu_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); int result; - if (st->chip_config.is_asleep) - return -EPERM; + + if (st->chip_config.enable) + return -EBUSY; + mutex_lock(&indio_dev->mlock); + result = st->set_power_state(st, true); + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } + switch (mask) { case IIO_CHAN_INFO_SCALE: - result = -EINVAL; - if (chan->type == IIO_ANGL_VEL) + switch (chan->type) { + case IIO_ANGL_VEL: result = inv_write_fsr(st, val); - if (chan->type == IIO_ACCEL) + break; + case IIO_ACCEL: result = inv_write_accel_fs(st, val); - if (chan->type == IIO_MAGN) + break; + case IIO_MAGN: result = inv_write_compass_scale(st, val); - return result; + break; + default: + result = -EINVAL; + break; + } + break; + case IIO_CHAN_INFO_OFFSET: + switch (chan->type) { + case IIO_ACCEL: + if (!st->chip_config.firmware_loaded) { + result = -EPERM; + goto error_write_raw; + } + result = inv_set_accel_bias_dmp(st); + if (result) + goto error_write_raw; + st->input_accel_bias[chan->channel2 - IIO_MOD_X] = val; + result = 0; + break; + default: + result = -EINVAL; + break; + } + break; default: - return -EINVAL; + result = -EINVAL; + break; } - return 0; -} -/** - * inv_set_lpf() - set low pass filer based on fifo rate. - */ -static int inv_set_lpf(struct inv_gyro_state_s *st, int rate) -{ - const short hz[] = {188, 98, 42, 20, 10, 5}; - const int d[] = {INV_FILTER_188HZ, INV_FILTER_98HZ, - INV_FILTER_42HZ, INV_FILTER_20HZ, - INV_FILTER_10HZ, INV_FILTER_5HZ}; - int i, h, data, result; - struct inv_reg_map_s *reg; - reg = &st->reg; - h = (rate >> 1); - i = 0; - while ((h < hz[i]) && (i < ARRAY_SIZE(d))) - i++; - if (i == ARRAY_SIZE(d)) - i -= 1; - data = d[i]; - if (INV_MPU3050 == st->chip_type) { - if (st->mpu_slave != NULL) { - result = st->mpu_slave->set_lpf(st, rate); - if (result) - return result; - } - result = inv_i2c_single_write(st, reg->lpf, data | - (st->chip_config.fsr << GYRO_CONFIG_FSR_SHIFT)); - if (result) - return result; - } else - result = inv_i2c_single_write(st, reg->lpf, data); - if (result) - return result; - st->chip_config.lpf = data; - return 0; +error_write_raw: + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); + + return result; } /** * inv_fifo_rate_store() - Set fifo rate. */ -static ssize_t inv_fifo_rate_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static int inv_fifo_rate_store(struct inv_mpu_iio_s *st, int fifo_rate) { - unsigned long fifo_rate; - unsigned char data; - int result; - struct inv_gyro_state_s *st; - struct inv_reg_map_s *reg; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - reg = &st->reg; - - if (st->chip_config.is_asleep) - return -EPERM; - if (kstrtoul(buf, 10, &fifo_rate)) - return -EINVAL; if ((fifo_rate < MIN_FIFO_RATE) || (fifo_rate > MAX_FIFO_RATE)) return -EINVAL; if (fifo_rate == st->chip_config.fifo_rate) - return count; + return 0; + if (st->chip_config.has_compass) { - data = COMPASS_RATE_SCALE*fifo_rate/ONE_K_HZ; - if (data > 0) - data -= 1; - st->compass_divider = data; + st->compass_divider = COMPASS_RATE_SCALE * fifo_rate / + ONE_K_HZ; + if (st->compass_divider > 0) + st->compass_divider -= 1; st->compass_counter = 0; - /* I2C_MST_DLY is set according to sample rate, - AKM cannot be read or set at sample rate higher than 100Hz*/ - result = inv_i2c_single_write(st, REG_I2C_SLV4_CTRL, data); - if (result) - return result; } - data = ONE_K_HZ / fifo_rate - 1; - result = inv_i2c_single_write(st, reg->sample_rate_div, data); - if (result) - return result; - st->chip_config.fifo_rate = fifo_rate; - result = inv_set_lpf(st, fifo_rate); - if (result) - return result; - st->irq_dur_us = (data + 1) * ONE_K_HZ; - st->last_isr_time = iio_get_time_ns(); - return count; -} -/** - * inv_fifo_rate_show() - Get the current sampling rate. - */ -static ssize_t inv_fifo_rate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.fifo_rate); -} + st->irq_dur_ns = (ONE_K_HZ / fifo_rate) * NSEC_PER_MSEC; + st->chip_config.new_fifo_rate = fifo_rate; -/** - * inv_power_state_store() - Turn device on/off. - */ -static ssize_t inv_power_state_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int result; - unsigned long power_state; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - if (kstrtoul(buf, 10, &power_state)) - return -EINVAL; - if (!power_state == st->chip_config.is_asleep) - return count; - result = inv_set_power_state(st, power_state); - return count; + return 0; } /** - * inv_power_state_show() - Check if the device is on or in sleep mode. + * inv_reg_dump_show() - Register dump for testing. */ -static ssize_t inv_power_state_show(struct device *dev, +static ssize_t inv_reg_dump_show(struct device *dev, struct device_attribute *attr, char *buf) { + int ii; + char data; + ssize_t bytes_printed = 0; struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return sprintf(buf, "0\n"); - else - return sprintf(buf, "1\n"); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + + mutex_lock(&indio_dev->mlock); + if (!st->chip_config.enable) + st->set_power_state(st, true); + for (ii = 0; ii < st->hw->num_reg; ii++) { + /* don't read fifo r/w register */ + if (ii == st->reg.fifo_r_w) + data = 0; + else + inv_i2c_read(st, ii, 1, &data); + bytes_printed += sprintf(buf + bytes_printed, "%#2x: %#2x\n", + ii, data); + } + if (!st->chip_config.enable) + st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); + + return bytes_printed; } -/** - * inv_firmware_loaded_store() - calling this function will change - * firmware load - */ -static ssize_t inv_firmware_loaded_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +int write_be32_key_to_mem(struct inv_mpu_iio_s *st, + u32 data, int key) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned long data, result; - result = kstrtoul(buf, 10, &data); - if (result) - return result; - if (data != 0) - return -EINVAL; - st->chip_config.firmware_loaded = 0; - st->chip_config.dmp_on = 0; - st->chip_config.quaternion_on = 0; - return count; + cpu_to_be32s(&data); + return mem_w_key(key, sizeof(data), (u8 *)&data); } + /** - * inv_firmware_loaded_show() - calling this function will show current - * firmware load status + * inv_quaternion_on() - calling this function will store + * current quaternion on */ -static ssize_t inv_firmware_loaded_show(struct device *dev, - struct device_attribute *attr, char *buf) +static int inv_quaternion_on(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + st->chip_config.quaternion_on = en; + if (!en) { + clear_bit(INV_MPU_SCAN_QUAT_R, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_X, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Y, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Z, ring->scan_mask); + } - return sprintf(buf, "%d\n", st->chip_config.firmware_loaded); + return 0; } /** - * inv_lpa_mode_store() - store current low power settings + * inv_dmp_attr_store() - calling this function will store current + * dmp parameter settings */ -static ssize_t inv_lpa_mode_store(struct device *dev, +static ssize_t inv_dmp_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned long result, lpa_mode; - unsigned char d; - struct inv_reg_map_s *reg; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &lpa_mode); - if (result) - return result; - - reg = &st->reg; - result = inv_i2c_read(st, reg->pwr_mgmt_1, 1, &d); - if (result) - return result; - d &= ~BIT_CYCLE; - if (lpa_mode) - d |= BIT_CYCLE; - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, d); - if (result) - return result; - st->chip_config.lpa_mode = lpa_mode; - return count; -} -/** - * inv_lpa_mode_show() - show current low power settings - */ -static ssize_t inv_lpa_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.lpa_mode); -} - -/** - * inv_lpa_freq_store() - store current low power frequency setting. - */ -static ssize_t inv_lpa_freq_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned long result, lpa_freq; - unsigned char d; - struct inv_reg_map_s *reg; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &lpa_freq); - if (result) - return result; - if (lpa_freq > MAX_LPA_FREQ_PARAM) - return -EINVAL; - reg = &st->reg; - result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &d); - if (result) - return result; - d &= ~BIT_LPA_FREQ; - d |= (unsigned char)(lpa_freq << LPA_FREQ_SHIFT); - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, d); - if (result) - return result; - st->chip_config.lpa_freq = lpa_freq; - return count; -} -/** - * inv_lpa_freq_show() - show current low power frequency setting - */ -static ssize_t inv_lpa_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - switch (st->chip_config.lpa_freq) { - case 0: - return sprintf(buf, "1.25\n"); - case 1: - return sprintf(buf, "5\n"); - case 2: - return sprintf(buf, "20\n"); - case 3: - return sprintf(buf, "40\n"); - default: - return sprintf(buf, "0\n"); - } -} -/** - * inv_dmp_on_store() - calling this function will store current dmp on - */ -static ssize_t inv_dmp_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - st->chip_config.dmp_on = !!data; - return count; -} - -/** - * inv_dmp_on_show() - calling this function will show current dmp_on - */ -static ssize_t inv_dmp_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.dmp_on); -} -/** - * inv_dmp_int_on_store() - calling this function will store current dmp int on - */ -static ssize_t inv_dmp_int_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - st->chip_config.dmp_int_on = !!data; - return count; -} - -/** - * inv_dmp_int_on_show() - calling this function will show current dmp_int_on - */ -static ssize_t inv_dmp_int_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.dmp_int_on); -} - -/** - * inv_dmp_output_rate_store() - calling this function store dmp_output_rate - */ -static ssize_t inv_dmp_output_rate_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - unsigned int result, data; - st = iio_priv(indio_dev); - - CHECK_DMP - if (0 == data) - return -EINVAL; - result = inv_set_fifo_rate(st, data); - if (result) - return result; - st->chip_config.dmp_output_rate = data; - return count; -} - -/** - * inv_dmp_output_rate_show() - calling this shows dmp_output_rate - */ -static ssize_t inv_dmp_output_rate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.dmp_output_rate); -} - -/** - * inv_orientation_on_store() - calling this function will store - * current orientation on - */ -static ssize_t inv_orientation_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data, en; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - en = !!data; - result = inv_enable_orientation_dmp(st, en); - if (result) - return result; - st->chip_config.orientation_on = en; - return count; -} -/** - * inv_orientation_on_show() - calling this function will show - * current orientation_on - */ -static ssize_t inv_orientation_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.orientation_on); -} - -/** - * inv_display_orient_on_store() - calling this function will store - * current display_orient on - */ -static ssize_t inv_display_orient_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data, en; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - en = !!data; - result = inv_set_display_orient_interrupt_dmp(st, en); - if (result) - return result; - st->chip_config.display_orient_on = en; - return count; -} -/** - * inv_display_orient_on_show() - calling this function will show - * current display_orient_on - */ -static ssize_t inv_display_orient_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.display_orient_on); -} - -/** - * inv_quaternion_on_store() - calling this function will store - * current quaternion on - */ -static ssize_t inv_quaternion_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data, en; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct iio_buffer *ring = indio_dev->buffer; - st = iio_priv(indio_dev); - - CHECK_DMP - en = !!data; - result = inv_send_quaternion(st, en); - if (result) - return result; - st->chip_config.quaternion_on = en; - if (0 == en) { - clear_bit(INV_MPU_SCAN_QUAT_R, ring->scan_mask); - clear_bit(INV_MPU_SCAN_QUAT_X, ring->scan_mask); - clear_bit(INV_MPU_SCAN_QUAT_Y, ring->scan_mask); - clear_bit(INV_MPU_SCAN_QUAT_Z, ring->scan_mask); - } - - return count; -} -/** - * inv_quaternion_on_show() - calling this function will show - * current orientation_on - */ -static ssize_t inv_quaternion_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.quaternion_on); -} - -/** - * inv_tap_on_store() - calling this function will store current tap on - */ -static ssize_t inv_tap_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - st->chip_config.tap_on = !!data; - result = inv_enable_tap_dmp(st, st->chip_config.tap_on); - return count; -} - -/** - * inv_tap_on_show() - calling this function will show current tap_on - */ -static ssize_t inv_tap_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->chip_config.tap_on); -} -/** - * inv_tap_time_store() - calling this function will store current tap time - */ -static ssize_t inv_tap_time_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - result = inv_set_tap_time_dmp(st, data); - if (result) - return result; - st->tap.time = data; - return count; -} -/** - * inv_tap_time_show() - calling this function will show current tap time - */ -static ssize_t inv_tap_time_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->tap.time); -} - -/** - * inv_tap_min_count_store() - calling this function will store tap count - */ -static ssize_t inv_tap_min_count_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - result = inv_set_min_taps_dmp(st, data); - if (result) - return result; - st->tap.min_count = data; - return count; -} -/** - * inv_tap_min_count_show() - calling this function show minimum count - */ -static ssize_t inv_tap_min_count_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->tap.min_count); -} - -/** - * inv_tap_threshold_store() - calling this function will store tap threshold - */ -static ssize_t inv_tap_threshold_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int result, data; - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - CHECK_DMP - result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_X, data); - if (result) - return result; - result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Y, data); - if (result) - return result; - result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Z, data); - if (result) - return result; - - st->tap.thresh = data; - return count; -} -/** - * inv_tap_thresh_show() - calling this function show current tap threshold - */ -static ssize_t inv_tap_threshold_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct inv_gyro_state_s *st; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->tap.thresh); -} -/** - * inv_clk_src_show() - Show the device's clock source. - */ -static ssize_t inv_clk_src_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - switch (st->chip_config.clk_src) { - case INV_CLK_INTERNAL: - return sprintf(buf, "INTERNAL\n"); - case INV_CLK_PLL: - return sprintf(buf, "Gyro PLL\n"); - default: - return -EPERM; - } -} -/** - * inv_reg_dump_show() - Register dump for testing. - * TODO: Only for testing. - */ -static ssize_t inv_reg_dump_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int ii; - char data; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - ssize_t bytes_printed = 0; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - for (ii = 0; ii < st->hw->num_reg; ii++) { - inv_i2c_read(st, ii, 1, &data); - bytes_printed += sprintf(buf+bytes_printed, "%#2x: %#2x\n", - ii, data); - } - return bytes_printed; -} - -/** - * inv_self_test_show() - self test result. 0 for fail; 1 for success. - * calling this function will trigger self test - * and return test result. - */ -static ssize_t inv_self_test_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int result; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (INV_MPU3050 == st->chip_type) - result = 0; - else - result = inv_hw_self_test(st); - return sprintf(buf, "%d\n", result); -} -/** - * inv_key_show() - calling this function will show the key - * - */ -static ssize_t inv_key_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - unsigned char *key; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - key = st->plat_data.key; - return sprintf(buf, - "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - key[0], key[1], key[2], key[3], - key[4], key[5], key[6], key[7], - key[8], key[9], key[10], key[11], - key[12], key[13], key[14], key[15]); -} -/** - * inv_gyro_matrix_show() - show orientation matrix - */ -static ssize_t inv_gyro_matrix_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - signed char *m; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - m = st->plat_data.orientation; - return sprintf(buf, - "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", - m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); -} -/** - * inv_accl_matrix_show() - show orientation matrix - */ -static ssize_t inv_accl_matrix_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - signed char *m; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->plat_data.sec_slave_type == SECONDARY_SLAVE_TYPE_ACCEL) - m = st->plat_data.secondary_orientation; - else - m = st->plat_data.orientation; - return sprintf(buf, - "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", - m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); -} -/** - * inv_compass_matrix_show() - show orientation matrix - */ -static ssize_t inv_compass_matrix_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - signed char *m; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->plat_data.sec_slave_type == SECONDARY_SLAVE_TYPE_COMPASS) - m = st->plat_data.secondary_orientation; - else - return -1; - return sprintf(buf, - "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", - m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); -} - -/** - * inv_flick_lower_store() - calling this function will store current - * flick lower bound - */ -static ssize_t inv_flick_lower_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtol(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - - result = mem_w_key(KEY_FLICK_LOWER, 4, p); - if (result) - return result; - st->flick.lower = data; - return count; -} - -/** - * inv_flick_lower_show() - calling this function will show current - * flick lower bound - */ -static ssize_t inv_flick_lower_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->flick.lower); -} -/** - * inv_flick_upper_store() - calling this function will store current - * flick upper bound - */ -static ssize_t inv_flick_upper_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_FLICK_UPPER, 4, p); - if (result) - return result; - st->flick.upper = data; - return count; -} - -/** - * inv_flick_upper_show() - calling this function will show current - * flick upper bound - */ -static ssize_t inv_flick_upper_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->flick.upper); -} -/** - * inv_flick_counter_store() - calling this function will store current - * flick counter value - */ -static ssize_t inv_flick_counter_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_FLICK_COUNTER, 4, p); - if (result) - return result; - st->flick.counter = data; - - return count; -} - -/** - * inv_flick_counter_show() - calling this function will show current - * flick counter value - */ -static ssize_t inv_flick_counter_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->flick.counter); -} - -/** - * inv_flick_int_on_store() - calling this function will store current - * flick interrupt on value - */ -static ssize_t inv_flick_int_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned long result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &data); - if (result) - return result; - if (data) - /* Use interrupt to signal when gesture was observed */ - d[0] = DIND40+4; - else - d[0] = DINAA0+8; - result = mem_w_key(KEY_CGNOTICE_INTR, 1, d); - if (result) - return result; - st->chip_config.flick_int_on = data; - return count; -} + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); + int result, data; -/** - * inv_flick_int_on_show() - calling this function will show current - * flick interrupt on value - */ -static ssize_t inv_flick_int_on_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.flick_int_on); -} -/** - * inv_flick_axis_store() - calling this function will store current - * flick axis value - */ -static ssize_t inv_flick_axis_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned long result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, &data); - if (result) - return result; + mutex_lock(&indio_dev->mlock); + if (st->chip_config.enable) { + result = -EBUSY; + goto dmp_attr_store_fail; + } + if (this_attr->address <= ATTR_DMP_DISPLAY_ORIENTATION_ON) { + if (!st->chip_config.firmware_loaded) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + result = st->set_power_state(st, true); + if (result) + goto dmp_attr_store_fail; + } - if (data == 0) - d[0] = DINBC2; - else if (data == 2) - d[2] = DINBC6; - else - d[0] = DINBC4; - result = mem_w_key(KEY_CFG_FLICK_IN, 1, d); + result = kstrtoint(buf, 10, &data); if (result) - return result; - st->flick.axis = data; - - return count; -} + goto dmp_attr_store_fail; + switch (this_attr->address) { + case ATTR_DMP_SMD_ENABLE: + { + u8 on[] = {0, 1}; + u8 off[] = {0, 0}; + u8 *d; + if (data) + d = on; + else + d = off; + result = mem_w_key(KEY_SMD_ENABLE, ARRAY_SIZE(on), d); + if (result) + goto dmp_attr_store_fail; + st->chip_config.smd_enable = !!data; + } + break; + case ATTR_DMP_SMD_THLD: + if (data < 0 || data > SHRT_MAX) + goto dmp_attr_store_fail; + result = write_be32_key_to_mem(st, data << 16, + KEY_SMD_ACCEL_THLD); + if (result) + goto dmp_attr_store_fail; + st->smd.threshold = data; + break; + case ATTR_DMP_SMD_DELAY_THLD: + if (data < 0 || data > INT_MAX / MPU_DEFAULT_DMP_FREQ) + goto dmp_attr_store_fail; + result = write_be32_key_to_mem(st, data * MPU_DEFAULT_DMP_FREQ, + KEY_SMD_DELAY_THLD); + if (result) + goto dmp_attr_store_fail; + st->smd.delay = data; + break; + case ATTR_DMP_SMD_DELAY_THLD2: + if (data < 0 || data > INT_MAX / MPU_DEFAULT_DMP_FREQ) + goto dmp_attr_store_fail; + result = write_be32_key_to_mem(st, data * MPU_DEFAULT_DMP_FREQ, + KEY_SMD_DELAY2_THLD); + if (result) + goto dmp_attr_store_fail; + st->smd.delay2 = data; + break; + case ATTR_DMP_TAP_ON: + result = inv_enable_tap_dmp(st, !!data); + if (result) + goto dmp_attr_store_fail; + st->chip_config.tap_on = !!data; + break; + case ATTR_DMP_TAP_THRESHOLD: { + const char ax[] = {INV_TAP_AXIS_X, INV_TAP_AXIS_Y, + INV_TAP_AXIS_Z}; + int i; + if (data < 0 || data > USHRT_MAX) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + for (i = 0; i < ARRAY_SIZE(ax); i++) { + result = inv_set_tap_threshold_dmp(st, ax[i], data); + if (result) + goto dmp_attr_store_fail; + } + st->tap.thresh = data; + break; + } + case ATTR_DMP_TAP_MIN_COUNT: + if (data < 0 || data > USHRT_MAX) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + result = inv_set_min_taps_dmp(st, data); + if (result) + goto dmp_attr_store_fail; + st->tap.min_count = data; + break; + case ATTR_DMP_TAP_TIME: + if (data < 0 || data > USHRT_MAX) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + result = inv_set_tap_time_dmp(st, data); + if (result) + goto dmp_attr_store_fail; + st->tap.time = data; + break; + case ATTR_DMP_DISPLAY_ORIENTATION_ON: + result = inv_set_display_orient_interrupt_dmp(st, !!data); + if (result) + goto dmp_attr_store_fail; + st->chip_config.display_orient_on = !!data; + break; + /* from here, power of chip is not turned on */ + case ATTR_DMP_ON: + st->chip_config.dmp_on = !!data; + break; + case ATTR_DMP_INT_ON: + st->chip_config.dmp_int_on = !!data; + break; + case ATTR_DMP_EVENT_INT_ON: + st->chip_config.dmp_event_int_on = !!data; + break; + case ATTR_DMP_OUTPUT_RATE: + if (data <= 0 || data > MAX_DMP_OUTPUT_RATE) { + result = -EINVAL; + goto dmp_attr_store_fail; + } + st->chip_config.dmp_output_rate = data; + if (st->chip_config.has_compass) { + st->compass_dmp_divider = COMPASS_RATE_SCALE * data / + ONE_K_HZ; + if (st->compass_dmp_divider > 0) + st->compass_dmp_divider -= 1; + st->compass_counter = 0; + } + break; + case ATTR_DMP_QUATERNION_ON: + result = inv_quaternion_on(st, indio_dev->buffer, !!data); + break; +#ifdef CONFIG_INV_TESTING + case ATTR_DEBUG_SMD_ENABLE_TESTP1: + { + u8 d[] = {0x42}; + result = st->set_power_state(st, true); + if (result) + goto dmp_attr_store_fail; + result = mem_w_key(KEY_SMD_ENABLE_TESTPT1, ARRAY_SIZE(d), d); + if (result) + goto dmp_attr_store_fail; + } + break; + case ATTR_DEBUG_SMD_ENABLE_TESTP2: + { + u8 d[] = {0x42}; + result = st->set_power_state(st, true); + if (result) + goto dmp_attr_store_fail; + result = mem_w_key(KEY_SMD_ENABLE_TESTPT2, ARRAY_SIZE(d), d); + if (result) + goto dmp_attr_store_fail; + } + break; +#endif + default: + result = -EINVAL; + goto dmp_attr_store_fail; + } -/** - * inv_flick_axis_show() - calling this function will show current - * flick axis value - */ -static ssize_t inv_flick_axis_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->flick.axis); -} -/** - * inv_flick_msg_on_store() - calling this function will store current - * flick message on value - */ -static ssize_t inv_flick_msg_on_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - if (data) - data = DATA_MSG_ON; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_FLICK_MSG, 4, p); +dmp_attr_store_fail: + if ((this_attr->address <= ATTR_DMP_DISPLAY_ORIENTATION_ON) && + (!st->chip_config.enable)) + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); if (result) return result; - st->flick.msg_on = data; return count; } /** - * inv_flick_msg_on_show() - calling this function will show current - * flick message on value + * inv_attr_show() - calling this function will show current + * dmp parameters. */ -static ssize_t inv_flick_msg_on_show(struct device *dev, +static ssize_t inv_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->flick.msg_on); -} + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); + int result; + s8 *m; + + switch (this_attr->address) { + case ATTR_DMP_SMD_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.smd_enable); + case ATTR_DMP_SMD_THLD: + return sprintf(buf, "%d\n", st->smd.threshold); + case ATTR_DMP_SMD_DELAY_THLD: + return sprintf(buf, "%d\n", st->smd.delay); + case ATTR_DMP_SMD_DELAY_THLD2: + return sprintf(buf, "%d\n", st->smd.delay2); + case ATTR_DMP_TAP_ON: + return sprintf(buf, "%d\n", st->chip_config.tap_on); + case ATTR_DMP_TAP_THRESHOLD: + return sprintf(buf, "%d\n", st->tap.thresh); + case ATTR_DMP_TAP_MIN_COUNT: + return sprintf(buf, "%d\n", st->tap.min_count); + case ATTR_DMP_TAP_TIME: + return sprintf(buf, "%d\n", st->tap.time); + case ATTR_DMP_DISPLAY_ORIENTATION_ON: + return sprintf(buf, "%d\n", + st->chip_config.display_orient_on); + + case ATTR_DMP_ON: + return sprintf(buf, "%d\n", st->chip_config.dmp_on); + case ATTR_DMP_INT_ON: + return sprintf(buf, "%d\n", st->chip_config.dmp_int_on); + case ATTR_DMP_EVENT_INT_ON: + return sprintf(buf, "%d\n", st->chip_config.dmp_event_int_on); + case ATTR_DMP_OUTPUT_RATE: + return sprintf(buf, "%d\n", + st->chip_config.dmp_output_rate); + case ATTR_DMP_QUATERNION_ON: + return sprintf(buf, "%d\n", st->chip_config.quaternion_on); + + case ATTR_MOTION_LPA_ON: + return sprintf(buf, "%d\n", st->mot_int.mot_on); + case ATTR_MOTION_LPA_FREQ:{ + const char *f[] = {"1.25", "5", "20", "40"}; + return sprintf(buf, "%s\n", f[st->chip_config.lpa_freq]); + } + case ATTR_MOTION_LPA_DURATION: + return sprintf(buf, "%d\n", st->mot_int.mot_dur); + case ATTR_MOTION_LPA_THRESHOLD: + return sprintf(buf, "%d\n", st->mot_int.mot_thr); + + case ATTR_SELF_TEST_SAMPLES: + return sprintf(buf, "%d\n", st->self_test.samples); + case ATTR_SELF_TEST_THRESHOLD: + return sprintf(buf, "%d\n", st->self_test.threshold); + case ATTR_GYRO_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.gyro_enable); + case ATTR_ACCL_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.accl_enable); + case ATTR_COMPASS_ENABLE: + return sprintf(buf, "%d\n", st->chip_config.compass_enable); + case ATTR_POWER_STATE: + return sprintf(buf, "%d\n", !fake_asleep); + case ATTR_FIRMWARE_LOADED: + return sprintf(buf, "%d\n", st->chip_config.firmware_loaded); + case ATTR_SAMPLING_FREQ: + return sprintf(buf, "%d\n", st->chip_config.new_fifo_rate); + + case ATTR_SELF_TEST: + if (st->chip_config.enable) + return -EBUSY; + mutex_lock(&indio_dev->mlock); + if (INV_MPU3050 == st->chip_type) + result = 1; + else + result = inv_hw_self_test(st); + mutex_unlock(&indio_dev->mlock); + return sprintf(buf, "%d\n", result); -/** - * inv_pedometer_steps_store() - calling this function will store current - * pedometer steps into MPU memory - */ -static ssize_t inv_pedometer_steps_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; + case ATTR_GYRO_MATRIX: + m = st->plat_data.orientation; + return sprintf(buf, "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); + case ATTR_ACCL_MATRIX: + if (st->plat_data.sec_slave_type == SECONDARY_SLAVE_TYPE_ACCEL) + m = st->plat_data.secondary_orientation; + else + m = st->plat_data.orientation; + return sprintf(buf, "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); + case ATTR_COMPASS_MATRIX: + if (st->plat_data.sec_slave_type == + SECONDARY_SLAVE_TYPE_COMPASS) + m = st->plat_data.secondary_orientation; + else + return -ENODEV; + return sprintf(buf, "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); + case ATTR_SECONDARY_NAME:{ + const char *n[] = {"0", "AK8975", "AK8972", "AK8963", "BMA250"}; + if (COMPASS_ID_AK8975 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[1]); + else if (COMPASS_ID_AK8972 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[2]); + else if (COMPASS_ID_AK8963 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[3]); + else if (ACCEL_ID_BMA250 == st->plat_data.sec_slave_id) + return sprintf(buf, "%s\n", n[4]); + else + return sprintf(buf, "%s\n", n[0]); + } - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_D_PEDSTD_STEPCTR, 4, p); - if (result) - return result; +#ifdef CONFIG_INV_TESTING + case ATTR_REG_WRITE: + return sprintf(buf, "1\n"); + case ATTR_DEBUG_SMD_EXE_STATE: + { + u8 d[2]; - return count; -} + result = st->set_power_state(st, true); + mpu_memory_read(st, st->i2c_addr, + inv_dmp_get_address(KEY_SMD_EXE_STATE), 2, d); + return sprintf(buf, "%d\n", (short)be16_to_cpup((__be16 *)(d))); + } + case ATTR_DEBUG_SMD_DELAY_CNTR: + { + u8 d[4]; -/** - * inv_pedometer_steps_show() - calling this function will store current - * pedometer steps into MPU memory - */ -static ssize_t inv_pedometer_steps_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) + result = st->set_power_state(st, true); + mpu_memory_read(st, st->i2c_addr, + inv_dmp_get_address(KEY_SMD_DELAY_CNTR), 4, d); + return sprintf(buf, "%d\n", (int)be32_to_cpup((__be32 *)(d))); + } +#endif + default: return -EPERM; - result = mpu_memory_read(st->sl_handle, st->i2c_addr, - inv_dmp_get_address(KEY_D_PEDSTD_STEPCTR), 4, d); - if (result) - return result; - data = be32_to_cpup((int *)d); - return sprintf(buf, "%d\n", data); + } } -/** - * inv_pedometer_time_store() - calling this function will store current - * pedometer time into MPU memory - */ -static ssize_t inv_pedometer_time_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - unsigned int result, data, out; - unsigned char *p; - if (st->chip_config.is_asleep) - return -EPERM; - result = kstrtoul(buf, 10, (long unsigned int *)&data); - if (result) - return result; - out = cpu_to_be32p(&data); - p = (unsigned char *)&out; - result = mem_w_key(KEY_D_PEDSTD_TIMECTR, 4, p); - if (result) - return result; - return count; -} /** - * inv_pedometer_time_show() - calling this function will store current - * pedometer steps into MPU memory + * inv_dmp_display_orient_show() - calling this function will + * show orientation This event must use poll. */ -static ssize_t inv_pedometer_time_show(struct device *dev, +static ssize_t inv_dmp_display_orient_show(struct device *dev, struct device_attribute *attr, char *buf) { - int result, data; - unsigned char d[4]; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - result = mpu_memory_read(st->sl_handle, st->i2c_addr, - inv_dmp_get_address(KEY_D_PEDSTD_TIMECTR), 4, d); - if (result) - return result; - data = be32_to_cpup((int *)d); - return sprintf(buf, "%d\n", data*20); + struct inv_mpu_iio_s *st = iio_priv(dev_get_drvdata(dev)); + return sprintf(buf, "%d\n", st->display_orient_data); } /** - * inv_dmp_flick_show() - calling this function will show flick event. + * inv_accel_motion_show() - calling this function showes motion interrupt. * This event must use poll. */ -static ssize_t inv_dmp_flick_show(struct device *dev, +static ssize_t inv_accel_motion_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "1\n"); } -/** - * inv_dmp_orient_show() - calling this function will show orientation - * This event must use poll. - */ -static ssize_t inv_dmp_orient_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->orient_data); -} /** - * inv_dmp_display_orient_show() - calling this function will - * show orientation This event must use poll. + * inv_smd_show() - calling this function showes smd interrupt. + * This event must use poll. */ -static ssize_t inv_dmp_display_orient_show(struct device *dev, +static ssize_t inv_smd_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->display_orient_data); + return sprintf(buf, "1\n"); } /** @@ -1518,611 +1111,541 @@ static ssize_t inv_dmp_display_orient_show(struct device *dev, static ssize_t inv_dmp_tap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(dev_get_drvdata(dev)); return sprintf(buf, "%d\n", st->tap_data); } + /** * inv_temperature_show() - Read temperature data directly from registers. */ static ssize_t inv_temperature_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct inv_reg_map_s *reg; - int result; + int result, cur_scale, cur_off; short temp; long scale_t; - unsigned char data[2]; - reg = &st->reg; + u8 data[2]; + const long scale[] = {3834792L, 3158064L, 3340827L}; + const long offset[] = {5383314L, 2394184L, 1376256L}; - if (st->chip_config.is_asleep) - return -EPERM; + reg = &st->reg; + mutex_lock(&indio_dev->mlock); + if (!st->chip_config.enable) + result = st->set_power_state(st, true); + else + result = 0; + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } result = inv_i2c_read(st, reg->temperature, 2, data); + if (!st->chip_config.enable) + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); if (result) { - printk(KERN_ERR "Could not read temperature register.\n"); + pr_err("Could not read temperature register.\n"); return result; } temp = (signed short)(be16_to_cpup((short *)&data[0])); + switch (st->chip_type) { + case INV_MPU3050: + cur_scale = scale[0]; + cur_off = offset[0]; + break; + case INV_MPU6050: + cur_scale = scale[1]; + cur_off = offset[1]; + break; + case INV_MPU6500: + cur_scale = scale[2]; + cur_off = offset[2]; + break; + default: + return -EINVAL; + }; + scale_t = cur_off + + inv_q30_mult((int)temp << MPU_TEMP_SHIFT, cur_scale); - if (INV_MPU3050 == st->chip_type) - scale_t = MPU3050_TEMP_OFFSET + - inv_q30_mult((long)temp << MPU_TEMP_SHIFT, - MPU3050_TEMP_SCALE); - else - scale_t = MPU6050_TEMP_OFFSET + - inv_q30_mult((long)temp << MPU_TEMP_SHIFT, - MPU6050_TEMP_SCALE); - return sprintf(buf, "%ld %lld\n", scale_t, iio_get_time_ns()); + INV_I2C_INC_TEMPREAD(1); + + return sprintf(buf, "%ld %lld\n", scale_t, get_time_ns()); } -static int inv_switch_gyro_engine(struct inv_gyro_state_s *st, int en) + +/** + * inv_firmware_loaded() - calling this function will change + * firmware load + */ +static int inv_firmware_loaded(struct inv_mpu_iio_s *st, int data) { - struct inv_reg_map_s *reg; - unsigned char data, p; - int result; - reg = &st->reg; - if (INV_MPU3050 == st->chip_type) { - if (en) { - data = INV_CLK_PLL; - p = (BITS_3050_POWER1 | data); - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - p = (BITS_3050_POWER2 | data); - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - p = data; - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - } else { - p = BITS_3050_GYRO_STANDBY; - result = inv_i2c_single_write(st, reg->pwr_mgmt_1, p); - if (result) - return result; - } - } else { - result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &data); - if (result) - return result; - if (en) - data &= (~BIT_PWR_GYRO_STBY); - else - data |= BIT_PWR_GYRO_STBY; - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); - if (result) - return result; - msleep(SENSOR_UP_TIME); - } - if (en) - st->chip_config.clk_src = INV_CLK_PLL; - else - st->chip_config.clk_src = INV_CLK_INTERNAL; + if (data) + return -EINVAL; + st->chip_config.firmware_loaded = 0; + st->chip_config.dmp_on = 0; + st->chip_config.quaternion_on = 0; return 0; } -static int inv_switch_accl_engine(struct inv_gyro_state_s *st, int en) + +static int inv_switch_gyro_engine(struct inv_mpu_iio_s *st, bool en) { - struct inv_reg_map_s *reg; - unsigned char data; - int result; - reg = &st->reg; - if (INV_MPU3050 == st->chip_type) { - if (NULL == st->mpu_slave) - return -EPERM; - if (en) - result = st->mpu_slave->resume(st); - else - result = st->mpu_slave->suspend(st); - if (result) - return result; - } else { - result = inv_i2c_read(st, reg->pwr_mgmt_2, 1, &data); - if (result) - return result; - if (en) - data &= (~BIT_PWR_ACCL_STBY); - else - data |= BIT_PWR_ACCL_STBY; - result = inv_i2c_single_write(st, reg->pwr_mgmt_2, data); - if (result) - return result; - msleep(SENSOR_UP_TIME); - } - return 0; + return inv_switch_engine(st, en, BIT_PWR_GYRO_STBY); +} + +static int inv_switch_accl_engine(struct inv_mpu_iio_s *st, bool en) +{ + return inv_switch_engine(st, en, BIT_PWR_ACCL_STBY); } /** - * inv_gyro_enable_store() - Enable/disable gyro. + * inv_gyro_enable() - Enable/disable gyro. */ -static ssize_t inv_gyro_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static int inv_gyro_enable(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - unsigned long data, en; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - int result; - - if (st->chip_config.is_asleep) - return -EPERM; - if (st->chip_config.enable) - return -EPERM; - - result = kstrtoul(buf, 10, &data); - if (result) - return -EINVAL; - en = !!data; if (en == st->chip_config.gyro_enable) - return count; - result = inv_switch_gyro_engine(st, en); - if (result) - return result; - - if (0 == en) { + return 0; + if (!en) { st->chip_config.gyro_fifo_enable = 0; clear_bit(INV_MPU_SCAN_GYRO_X, ring->scan_mask); clear_bit(INV_MPU_SCAN_GYRO_Y, ring->scan_mask); clear_bit(INV_MPU_SCAN_GYRO_Z, ring->scan_mask); } st->chip_config.gyro_enable = en; - return count; -} -/** - * inv_gyro_enable_show() - Check if the FIFO and ring buffer are enabled. - */ -static ssize_t inv_gyro_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.gyro_enable); + + return 0; } /** - * inv_accl_enable_store() - Enable/disable accl. + * inv_accl_enable() - Enable/disable accl. */ -static ssize_t inv_accl_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t inv_accl_enable(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - unsigned long en, data; - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - int result; - - if (st->chip_config.is_asleep) - return -EPERM; - if (st->chip_config.enable) - return -EPERM; - result = kstrtoul(buf, 10, &data); - if (result) - return -EINVAL; - if (data) - en = 1; - else - en = 0; if (en == st->chip_config.accl_enable) - return count; - result = inv_switch_accl_engine(st, en); - if (result) - return result; - st->chip_config.accl_enable = en; - if (0 == en) { + return 0; + if (!en) { st->chip_config.accl_fifo_enable = 0; clear_bit(INV_MPU_SCAN_ACCL_X, ring->scan_mask); clear_bit(INV_MPU_SCAN_ACCL_Y, ring->scan_mask); clear_bit(INV_MPU_SCAN_ACCL_Z, ring->scan_mask); } - return count; + st->chip_config.accl_enable = en; + + return 0; } + /** - * inv_accl_enable_show() - Check if the FIFO and ring buffer are enabled. + * inv_compass_enable() - calling this function will store compass + * enable */ -static ssize_t inv_accl_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t inv_compass_enable(struct inv_mpu_iio_s *st, + struct iio_buffer *ring, bool en) { - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); - return sprintf(buf, "%d\n", st->chip_config.accl_enable); + if (en == st->chip_config.compass_enable) + return 0; + if (!en) { + st->chip_config.compass_fifo_enable = 0; + clear_bit(INV_MPU_SCAN_MAGN_X, ring->scan_mask); + clear_bit(INV_MPU_SCAN_MAGN_Y, ring->scan_mask); + clear_bit(INV_MPU_SCAN_MAGN_Z, ring->scan_mask); + } + st->chip_config.compass_enable = en; + + return 0; } /** - * inv_compass_en_store() - calling this function will store compass - * enable + * inv_attr_store() - calling this function will store current + * non-dmp parameter settings */ -static ssize_t inv_compass_en_store(struct device *dev, +static ssize_t inv_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long data, result, en; struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct iio_buffer *ring = indio_dev->buffer; - if (st->chip_config.is_asleep) - return -EPERM; - if (st->chip_config.enable) - return -EPERM; - result = kstrtoul(buf, 10, &data); + struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); + int data; + u8 d; + int result; + + mutex_lock(&indio_dev->mlock); + if (st->chip_config.enable) { + result = -EBUSY; + goto attr_store_fail; + } + if (this_attr->address <= ATTR_MOTION_LPA_THRESHOLD) { + result = st->set_power_state(st, true); + if (result) + goto attr_store_fail; + } + + result = kstrtoint(buf, 10, &data); + if (result) + goto attr_store_fail; + switch (this_attr->address) { + case ATTR_MOTION_LPA_ON: + if (INV_MPU6500 == st->chip_type) { + if (data) + /* enable and put in MPU6500 mode */ + d = BIT_ACCEL_INTEL_ENABLE + | BIT_ACCEL_INTEL_MODE; + else + d = 0; + result = inv_i2c_single_write(st, + REG_6500_ACCEL_INTEL_CTRL, d); + if (result) + goto attr_store_fail; + } + st->mot_int.mot_on = !!data; + st->chip_config.lpa_mode = !!data; + break; + case ATTR_MOTION_LPA_FREQ: + result = inv_lpa_freq(st, data); + break; + case ATTR_MOTION_LPA_DURATION: + if (INV_MPU6500 != st->chip_type) { + result = inv_i2c_single_write(st, REG_ACCEL_MOT_DUR, + MPU6050_MOTION_DUR_DEFAULT); + if (result) + goto attr_store_fail; + } + st->mot_int.mot_dur = data; + break; + case ATTR_MOTION_LPA_THRESHOLD: + if ((data > MPU6XXX_MAX_MOTION_THRESH) || (data < 0)) { + result = -EINVAL; + goto attr_store_fail; + } + d = (u8)(data >> MPU6XXX_MOTION_THRESH_SHIFT); + data = (d << MPU6XXX_MOTION_THRESH_SHIFT); + result = inv_i2c_single_write(st, REG_ACCEL_MOT_THR, d); + if (result) + goto attr_store_fail; + st->mot_int.mot_thr = data; + break; + /* from now on, power is not turned on */ + case ATTR_SELF_TEST_SAMPLES: + if (data > ST_MAX_SAMPLES || data < 0) { + result = -EINVAL; + goto attr_store_fail; + } + st->self_test.samples = data; + break; + case ATTR_SELF_TEST_THRESHOLD: + if (data > ST_MAX_THRESHOLD || data < 0) { + result = -EINVAL; + goto attr_store_fail; + } + st->self_test.threshold = data; + case ATTR_GYRO_ENABLE: + result = st->gyro_en(st, ring, !!data); + break; + case ATTR_ACCL_ENABLE: + result = st->accl_en(st, ring, !!data); + break; + case ATTR_COMPASS_ENABLE: + result = inv_compass_enable(st, ring, !!data); + break; + case ATTR_POWER_STATE: + fake_asleep = !data; + break; + case ATTR_FIRMWARE_LOADED: + result = inv_firmware_loaded(st, data); + break; + case ATTR_SAMPLING_FREQ: + result = inv_fifo_rate_store(st, data); + break; + default: + result = -EINVAL; + goto attr_store_fail; + }; + +attr_store_fail: + if ((this_attr->address <= ATTR_MOTION_LPA_THRESHOLD) && + (!st->chip_config.enable)) + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); if (result) return result; - if (data) - en = 1; - else - en = 0; - if (en == st->chip_config.compass_enable) - return count; - st->chip_config.compass_enable = en; - if (0 == en) { - st->chip_config.compass_fifo_enable = 0; - clear_bit(INV_MPU_SCAN_MAGN_X, ring->scan_mask); - clear_bit(INV_MPU_SCAN_MAGN_Y, ring->scan_mask); - clear_bit(INV_MPU_SCAN_MAGN_Z, ring->scan_mask); - } return count; } + +#ifdef CONFIG_INV_TESTING /** - * inv_compass_en_show() - calling this function will show compass - * enable status + * inv_reg_write_store() - register write command for testing. + * Format: WSRRDD, where RR is the register in hex, + * and DD is the data in hex. */ -static ssize_t inv_compass_en_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t inv_reg_write_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + u32 result; + u8 wreg, wval; + int temp; + char local_buf[10]; + + if ((buf[0] != 'W' && buf[0] != 'w') || + (buf[1] != 'S' && buf[1] != 's')) + return -EINVAL; + if (strlen(buf) < 6) + return -EINVAL; + + strncpy(local_buf, buf, 7); + local_buf[6] = 0; + result = sscanf(&local_buf[4], "%x", &temp); + if (result == 0) + return -EINVAL; + wval = temp; + local_buf[4] = 0; + sscanf(&local_buf[2], "%x", &temp); + if (result == 0) + return -EINVAL; + wreg = temp; - return sprintf(buf, "%d\n", st->chip_config.compass_enable); + result = inv_i2c_single_write(st, wreg, wval); + if (result) + return result; + + return count; } +#endif /* CONFIG_INV_TESTING */ + +#define INV_MPU_CHAN(_type, _channel2, _index) \ + { \ + .type = _type, \ + .modified = 1, \ + .channel2 = _channel2, \ + .info_mask = (IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | \ + IIO_CHAN_INFO_SCALE_SHARED_BIT), \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 16, 16, 0) \ + } -static const struct iio_chan_spec gyro_channels[] = { - /*there is only one gyro, with modifier X, Y, Z - So it is not indexed. no modifier name, only simple, x, y,z - the scale should be shared while bias is not so each - axis has different bias*/ - { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, - IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP) -}; +#define INV_ACCL_CHAN(_type, _channel2, _index) \ + { \ + .type = _type, \ + .modified = 1, \ + .channel2 = _channel2, \ + .info_mask = (IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | \ + IIO_CHAN_INFO_SCALE_SHARED_BIT | \ + IIO_CHAN_INFO_OFFSET_SEPARATE_BIT), \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 16, 16, 0) \ + } -static const struct iio_chan_spec gyro_accel_channels[] = { - { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_R, - .scan_index = INV_MPU_SCAN_QUAT_R, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_X, - .scan_index = INV_MPU_SCAN_QUAT_X, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Y, - .scan_index = INV_MPU_SCAN_QUAT_Y, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Z, - .scan_index = INV_MPU_SCAN_QUAT_Z, - .scan_type = IIO_ST('s', 32, 32, 0) - }, - IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP) -}; -static const struct iio_chan_spec gyro_accel_compass_channels[] = { - { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ANGL_VEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_GYRO_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_ACCEL, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_CALIBBIAS_SEPARATE_BIT | - IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_ACCL_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_MAGN, - .modified = 1, - .channel2 = IIO_MOD_X, - .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_MAGN_X, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_MAGN, - .modified = 1, - .channel2 = IIO_MOD_Y, - .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_MAGN_Y, - .scan_type = IIO_ST('s', 16, 16, 0) - }, { - .type = IIO_MAGN, - .modified = 1, - .channel2 = IIO_MOD_Z, - .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, - .scan_index = INV_MPU_SCAN_MAGN_Z, - .scan_type = IIO_ST('s', 16, 16, 0) - }, - { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_R, - .scan_index = INV_MPU_SCAN_QUAT_R, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_X, - .scan_index = INV_MPU_SCAN_QUAT_X, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Y, - .scan_index = INV_MPU_SCAN_QUAT_Y, - .scan_type = IIO_ST('s', 32, 32, 0) - }, { - .type = IIO_QUATERNION, - .modified = 1, - .channel2 = IIO_MOD_Z, - .scan_index = INV_MPU_SCAN_QUAT_Z, - .scan_type = IIO_ST('s', 32, 32, 0) - }, - IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP) -}; +#define INV_MPU_QUATERNION_CHAN(_channel2, _index) \ + { \ + .type = IIO_QUATERNION, \ + .modified = 1, \ + .channel2 = _channel2, \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 32, 32, 0) \ + } -static struct inv_chip_chan_info chip_channel_info[] = { - { - .channels = gyro_channels, - .num_channels = ARRAY_SIZE(gyro_channels), - }, - { - .channels = gyro_accel_channels, - .num_channels = ARRAY_SIZE(gyro_accel_channels), - }, - { - .channels = gyro_accel_compass_channels, - .num_channels = ARRAY_SIZE(gyro_accel_compass_channels), +#define INV_MPU_MAGN_CHAN(_channel2, _index) \ + { \ + .type = IIO_MAGN, \ + .modified = 1, \ + .channel2 = _channel2, \ + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, \ + .scan_index = _index, \ + .scan_type = IIO_ST('s', 16, 16, 0) \ } + +static const struct iio_chan_spec inv_mpu_channels[] = { + IIO_CHAN_SOFT_TIMESTAMP(INV_MPU_SCAN_TIMESTAMP), + INV_MPU_CHAN(IIO_ANGL_VEL, IIO_MOD_X, INV_MPU_SCAN_GYRO_X), + INV_MPU_CHAN(IIO_ANGL_VEL, IIO_MOD_Y, INV_MPU_SCAN_GYRO_Y), + INV_MPU_CHAN(IIO_ANGL_VEL, IIO_MOD_Z, INV_MPU_SCAN_GYRO_Z), + + INV_ACCL_CHAN(IIO_ACCEL, IIO_MOD_X, INV_MPU_SCAN_ACCL_X), + INV_ACCL_CHAN(IIO_ACCEL, IIO_MOD_Y, INV_MPU_SCAN_ACCL_Y), + INV_ACCL_CHAN(IIO_ACCEL, IIO_MOD_Z, INV_MPU_SCAN_ACCL_Z), + + INV_MPU_QUATERNION_CHAN(IIO_MOD_R, INV_MPU_SCAN_QUAT_R), + INV_MPU_QUATERNION_CHAN(IIO_MOD_X, INV_MPU_SCAN_QUAT_X), + INV_MPU_QUATERNION_CHAN(IIO_MOD_Y, INV_MPU_SCAN_QUAT_Y), + INV_MPU_QUATERNION_CHAN(IIO_MOD_Z, INV_MPU_SCAN_QUAT_Z), + + INV_MPU_MAGN_CHAN(IIO_MOD_X, INV_MPU_SCAN_MAGN_X), + INV_MPU_MAGN_CHAN(IIO_MOD_Y, INV_MPU_SCAN_MAGN_Y), + INV_MPU_MAGN_CHAN(IIO_MOD_Z, INV_MPU_SCAN_MAGN_Z), }; + /*constant IIO attribute */ -static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("10 50 100 200 500"); -static IIO_DEV_ATTR_SAMP_FREQ(S_IRUGO | S_IWUSR, inv_fifo_rate_show, - inv_fifo_rate_store); -static DEVICE_ATTR(temperature, S_IRUGO, inv_temperature_show, NULL); -static DEVICE_ATTR(clock_source, S_IRUGO, inv_clk_src_show, NULL); -static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, inv_power_state_show, - inv_power_state_store); -static DEVICE_ATTR(firmware_loaded, S_IRUGO | S_IWUSR, - inv_firmware_loaded_show, inv_firmware_loaded_store); -static DEVICE_ATTR(lpa_mode, S_IRUGO | S_IWUSR, inv_lpa_mode_show, - inv_lpa_mode_store); -static DEVICE_ATTR(lpa_freq, S_IRUGO | S_IWUSR, inv_lpa_freq_show, - inv_lpa_freq_store); +static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("10 20 50 100 200 500"); + +/* special sysfs */ static DEVICE_ATTR(reg_dump, S_IRUGO, inv_reg_dump_show, NULL); -static DEVICE_ATTR(self_test, S_IRUGO, inv_self_test_show, NULL); -static DEVICE_ATTR(key, S_IRUGO, inv_key_show, NULL); -static DEVICE_ATTR(gyro_matrix, S_IRUGO, inv_gyro_matrix_show, NULL); -static DEVICE_ATTR(accl_matrix, S_IRUGO, inv_accl_matrix_show, NULL); -static DEVICE_ATTR(compass_matrix, S_IRUGO, inv_compass_matrix_show, NULL); -static DEVICE_ATTR(flick_lower, S_IRUGO | S_IWUSR, inv_flick_lower_show, - inv_flick_lower_store); -static DEVICE_ATTR(flick_upper, S_IRUGO | S_IWUSR, inv_flick_upper_show, - inv_flick_upper_store); -static DEVICE_ATTR(flick_counter, S_IRUGO | S_IWUSR, inv_flick_counter_show, - inv_flick_counter_store); -static DEVICE_ATTR(flick_message_on, S_IRUGO | S_IWUSR, inv_flick_msg_on_show, - inv_flick_msg_on_store); -static DEVICE_ATTR(flick_int_on, S_IRUGO | S_IWUSR, inv_flick_int_on_show, - inv_flick_int_on_store); -static DEVICE_ATTR(flick_axis, S_IRUGO | S_IWUSR, inv_flick_axis_show, - inv_flick_axis_store); -static DEVICE_ATTR(dmp_on, S_IRUGO | S_IWUSR, inv_dmp_on_show, - inv_dmp_on_store); -static DEVICE_ATTR(dmp_int_on, S_IRUGO | S_IWUSR, inv_dmp_int_on_show, - inv_dmp_int_on_store); -static DEVICE_ATTR(dmp_output_rate, S_IRUGO | S_IWUSR, - inv_dmp_output_rate_show, inv_dmp_output_rate_store); -static DEVICE_ATTR(orientation_on, S_IRUGO | S_IWUSR, - inv_orientation_on_show, inv_orientation_on_store); -static DEVICE_ATTR(quaternion_on, S_IRUGO | S_IWUSR, - inv_quaternion_on_show, inv_quaternion_on_store); -static DEVICE_ATTR(display_orientation_on, S_IRUGO | S_IWUSR, - inv_display_orient_on_show, inv_display_orient_on_store); -static DEVICE_ATTR(tap_on, S_IRUGO | S_IWUSR, inv_tap_on_show, - inv_tap_on_store); -static DEVICE_ATTR(tap_time, S_IRUGO | S_IWUSR, inv_tap_time_show, - inv_tap_time_store); -static DEVICE_ATTR(tap_min_count, S_IRUGO | S_IWUSR, inv_tap_min_count_show, - inv_tap_min_count_store); -static DEVICE_ATTR(tap_threshold, S_IRUGO | S_IWUSR, inv_tap_threshold_show, - inv_tap_threshold_store); -static DEVICE_ATTR(pedometer_time, S_IRUGO | S_IWUSR, inv_pedometer_time_show, - inv_pedometer_time_store); -static DEVICE_ATTR(pedometer_steps, S_IRUGO | S_IWUSR, - inv_pedometer_steps_show, inv_pedometer_steps_store); -static DEVICE_ATTR(event_flick, S_IRUGO, inv_dmp_flick_show, NULL); -static DEVICE_ATTR(event_orientation, S_IRUGO, inv_dmp_orient_show, NULL); +static DEVICE_ATTR(temperature, S_IRUGO, inv_temperature_show, NULL); + +/* event based sysfs, needs poll to read */ static DEVICE_ATTR(event_tap, S_IRUGO, inv_dmp_tap_show, NULL); static DEVICE_ATTR(event_display_orientation, S_IRUGO, - inv_dmp_display_orient_show, NULL); -static DEVICE_ATTR(gyro_enable, S_IRUGO | S_IWUSR, inv_gyro_enable_show, - inv_gyro_enable_store); -static DEVICE_ATTR(accl_enable, S_IRUGO | S_IWUSR, inv_accl_enable_show, - inv_accl_enable_store); -static DEVICE_ATTR(compass_enable, S_IRUGO | S_IWUSR, inv_compass_en_show, - inv_compass_en_store); + inv_dmp_display_orient_show, NULL); +static DEVICE_ATTR(event_accel_motion, S_IRUGO, inv_accel_motion_show, NULL); +static DEVICE_ATTR(event_smd, S_IRUGO, inv_smd_show, NULL); + +/* DMP sysfs with power on/off */ +static IIO_DEVICE_ATTR(smd_enable, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_ENABLE); +static IIO_DEVICE_ATTR(smd_threshold, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_THLD); +static IIO_DEVICE_ATTR(smd_delay_threshold, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_DELAY_THLD); +static IIO_DEVICE_ATTR(smd_delay_threshold2, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_SMD_DELAY_THLD2); +static IIO_DEVICE_ATTR(tap_on, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_ON); +static IIO_DEVICE_ATTR(tap_threshold, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_THRESHOLD); +static IIO_DEVICE_ATTR(tap_min_count, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_MIN_COUNT); +static IIO_DEVICE_ATTR(tap_time, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_TAP_TIME); +static IIO_DEVICE_ATTR(display_orientation_on, S_IRUGO | S_IWUSR, + inv_attr_show, inv_dmp_attr_store, ATTR_DMP_DISPLAY_ORIENTATION_ON); + +/* DMP sysfs without power on/off */ +static IIO_DEVICE_ATTR(dmp_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_ON); +static IIO_DEVICE_ATTR(dmp_int_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_INT_ON); +static IIO_DEVICE_ATTR(dmp_event_int_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_EVENT_INT_ON); +static IIO_DEVICE_ATTR(dmp_output_rate, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_OUTPUT_RATE); +static IIO_DEVICE_ATTR(quaternion_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_dmp_attr_store, ATTR_DMP_QUATERNION_ON); + +/* non DMP sysfs with power on/off */ +static IIO_DEVICE_ATTR(motion_lpa_on, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_ON); +static IIO_DEVICE_ATTR(motion_lpa_freq, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_FREQ); +static IIO_DEVICE_ATTR(motion_lpa_duration, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_DURATION); +static IIO_DEVICE_ATTR(motion_lpa_threshold, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_MOTION_LPA_THRESHOLD); + +/* non DMP sysfs without power on/off */ +static IIO_DEVICE_ATTR(self_test_samples, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_SELF_TEST_SAMPLES); +static IIO_DEVICE_ATTR(self_test_threshold, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_SELF_TEST_THRESHOLD); +static IIO_DEVICE_ATTR(gyro_enable, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_GYRO_ENABLE); +static IIO_DEVICE_ATTR(accl_enable, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_ACCL_ENABLE); +static IIO_DEVICE_ATTR(compass_enable, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_COMPASS_ENABLE); +static IIO_DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_POWER_STATE); +static IIO_DEVICE_ATTR(firmware_loaded, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_FIRMWARE_LOADED); +static IIO_DEVICE_ATTR(sampling_frequency, S_IRUGO | S_IWUSR, inv_attr_show, + inv_attr_store, ATTR_SAMPLING_FREQ); + +/* show method only sysfs but with power on/off */ +static IIO_DEVICE_ATTR(self_test, S_IRUGO, inv_attr_show, NULL, + ATTR_SELF_TEST); + +/* show method only sysfs */ +static IIO_DEVICE_ATTR(gyro_matrix, S_IRUGO, inv_attr_show, NULL, + ATTR_GYRO_MATRIX); +static IIO_DEVICE_ATTR(accl_matrix, S_IRUGO, inv_attr_show, NULL, + ATTR_ACCL_MATRIX); +static IIO_DEVICE_ATTR(compass_matrix, S_IRUGO, inv_attr_show, NULL, + ATTR_COMPASS_MATRIX); +static IIO_DEVICE_ATTR(secondary_name, S_IRUGO, inv_attr_show, NULL, + ATTR_SECONDARY_NAME); + +#ifdef CONFIG_INV_TESTING +static IIO_DEVICE_ATTR(reg_write, S_IRUGO | S_IWUSR, inv_attr_show, + inv_reg_write_store, ATTR_REG_WRITE); +/* smd debug related sysfs */ +static IIO_DEVICE_ATTR(debug_smd_enable_testp1, S_IWUSR, NULL, + inv_dmp_attr_store, ATTR_DEBUG_SMD_ENABLE_TESTP1); +static IIO_DEVICE_ATTR(debug_smd_enable_testp2, S_IWUSR, NULL, + inv_dmp_attr_store, ATTR_DEBUG_SMD_ENABLE_TESTP2); +static IIO_DEVICE_ATTR(debug_smd_exe_state, S_IRUGO, inv_attr_show, + NULL, ATTR_DEBUG_SMD_EXE_STATE); +static IIO_DEVICE_ATTR(debug_smd_delay_cntr, S_IRUGO, inv_attr_show, + NULL, ATTR_DEBUG_SMD_DELAY_CNTR); +#endif static const struct attribute *inv_gyro_attributes[] = { - &dev_attr_gyro_enable.attr, - &dev_attr_temperature.attr, - &dev_attr_clock_source.attr, - &dev_attr_power_state.attr, + &iio_const_attr_sampling_frequency_available.dev_attr.attr, &dev_attr_reg_dump.attr, - &dev_attr_self_test.attr, - &dev_attr_key.attr, - &dev_attr_gyro_matrix.attr, + &dev_attr_temperature.attr, + &iio_dev_attr_self_test_samples.dev_attr.attr, + &iio_dev_attr_self_test_threshold.dev_attr.attr, + &iio_dev_attr_gyro_enable.dev_attr.attr, + &iio_dev_attr_power_state.dev_attr.attr, &iio_dev_attr_sampling_frequency.dev_attr.attr, - &iio_const_attr_sampling_frequency_available.dev_attr.attr, + &iio_dev_attr_self_test.dev_attr.attr, + &iio_dev_attr_gyro_matrix.dev_attr.attr, + &iio_dev_attr_secondary_name.dev_attr.attr, +#ifdef CONFIG_INV_TESTING + &iio_dev_attr_reg_write.dev_attr.attr, + &iio_dev_attr_debug_smd_enable_testp1.dev_attr.attr, + &iio_dev_attr_debug_smd_enable_testp2.dev_attr.attr, + &iio_dev_attr_debug_smd_exe_state.dev_attr.attr, + &iio_dev_attr_debug_smd_delay_cntr.dev_attr.attr, +#endif }; static const struct attribute *inv_mpu6050_attributes[] = { - &dev_attr_accl_enable.attr, - &dev_attr_accl_matrix.attr, - &dev_attr_firmware_loaded.attr, - &dev_attr_lpa_mode.attr, - &dev_attr_lpa_freq.attr, - &dev_attr_flick_lower.attr, - &dev_attr_flick_upper.attr, - &dev_attr_flick_counter.attr, - &dev_attr_flick_message_on.attr, - &dev_attr_flick_int_on.attr, - &dev_attr_flick_axis.attr, - &dev_attr_dmp_on.attr, - &dev_attr_dmp_int_on.attr, - &dev_attr_dmp_output_rate.attr, - &dev_attr_orientation_on.attr, - &dev_attr_quaternion_on.attr, - &dev_attr_display_orientation_on.attr, - &dev_attr_tap_on.attr, - &dev_attr_tap_time.attr, - &dev_attr_tap_min_count.attr, - &dev_attr_tap_threshold.attr, - &dev_attr_pedometer_time.attr, - &dev_attr_pedometer_steps.attr, - &dev_attr_event_flick.attr, - &dev_attr_event_orientation.attr, &dev_attr_event_display_orientation.attr, &dev_attr_event_tap.attr, + &dev_attr_event_accel_motion.attr, + &dev_attr_event_smd.attr, + &iio_dev_attr_smd_enable.dev_attr.attr, + &iio_dev_attr_smd_threshold.dev_attr.attr, + &iio_dev_attr_smd_delay_threshold.dev_attr.attr, + &iio_dev_attr_smd_delay_threshold2.dev_attr.attr, + &iio_dev_attr_tap_on.dev_attr.attr, + &iio_dev_attr_tap_threshold.dev_attr.attr, + &iio_dev_attr_tap_min_count.dev_attr.attr, + &iio_dev_attr_tap_time.dev_attr.attr, + &iio_dev_attr_display_orientation_on.dev_attr.attr, + &iio_dev_attr_dmp_on.dev_attr.attr, + &iio_dev_attr_dmp_int_on.dev_attr.attr, + &iio_dev_attr_dmp_event_int_on.dev_attr.attr, + &iio_dev_attr_dmp_output_rate.dev_attr.attr, + &iio_dev_attr_quaternion_on.dev_attr.attr, + &iio_dev_attr_motion_lpa_on.dev_attr.attr, + &iio_dev_attr_motion_lpa_freq.dev_attr.attr, + &iio_dev_attr_motion_lpa_duration.dev_attr.attr, + &iio_dev_attr_motion_lpa_threshold.dev_attr.attr, + &iio_dev_attr_accl_enable.dev_attr.attr, + &iio_dev_attr_firmware_loaded.dev_attr.attr, + &iio_dev_attr_accl_matrix.dev_attr.attr, }; static const struct attribute *inv_compass_attributes[] = { - &dev_attr_compass_matrix.attr, - &dev_attr_compass_enable.attr, + &iio_dev_attr_compass_enable.dev_attr.attr, + &iio_dev_attr_compass_matrix.dev_attr.attr, }; static const struct attribute *inv_mpu3050_attributes[] = { - &dev_attr_accl_matrix.attr, - &dev_attr_accl_enable.attr, + &iio_dev_attr_accl_enable.dev_attr.attr, + &iio_dev_attr_accl_matrix.dev_attr.attr, }; static struct attribute *inv_attributes[ARRAY_SIZE(inv_gyro_attributes) + ARRAY_SIZE(inv_mpu6050_attributes) + ARRAY_SIZE(inv_compass_attributes) + 1]; + static const struct attribute_group inv_attribute_group = { .name = "mpu", .attrs = inv_attributes @@ -2138,23 +1661,26 @@ static const struct iio_info mpu_info = { /** * inv_setup_compass() - Configure compass. */ -static int inv_setup_compass(struct inv_gyro_state_s *st) +static int inv_setup_compass(struct inv_mpu_iio_s *st) { int result; - unsigned char data[4]; + u8 data[4]; - result = inv_i2c_read(st, REG_YGOFFS_TC, 1, data); - if (result) - return result; - data[0] &= ~BIT_I2C_MST_VDDIO; - if (st->plat_data.level_shifter) - data[0] |= BIT_I2C_MST_VDDIO; - /*set up VDDIO register */ - result = inv_i2c_single_write(st, REG_YGOFFS_TC, data[0]); - if (result) - return result; + if (INV_MPU6050 == st->chip_type) { + result = inv_i2c_read(st, REG_YGOFFS_TC, 1, data); + if (result) + return result; + data[0] &= ~BIT_I2C_MST_VDDIO; + if (st->plat_data.level_shifter) + data[0] |= BIT_I2C_MST_VDDIO; + /*set up VDDIO register */ + result = inv_i2c_single_write(st, REG_YGOFFS_TC, data[0]); + if (result) + return result; + } /* set to bypass mode */ - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, BIT_BYPASS_EN); + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config | BIT_BYPASS_EN); if (result) return result; /*read secondary i2c ID register */ @@ -2164,7 +1690,7 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) if (data[0] != DATA_AKM_ID) return -ENXIO; /*set AKM to Fuse ROM access mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_FR); + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_FR); if (result) return result; result = inv_secondary_read(REG_AKM_SENSITIVITY, THREE_AXIS, @@ -2172,15 +1698,17 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) if (result) return result; /*revert to power down mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_DN); + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PD); if (result) return result; - pr_err("senx=%d, seny=%d,senz=%d\n", - st->chip_info.compass_sens[0], - st->chip_info.compass_sens[1], - st->chip_info.compass_sens[2]); + pr_debug("%s senx=%d, seny=%d, senz=%d\n", + st->hw->name, + st->chip_info.compass_sens[0], + st->chip_info.compass_sens[1], + st->chip_info.compass_sens[2]); /*restore to non-bypass mode */ - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, 0); + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config); if (result) return result; @@ -2218,29 +1746,19 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) return result; /* output data for slave 1 is fixed, single measure mode*/ st->compass_scale = 1; - data[0] = 1; if (COMPASS_ID_AK8975 == st->plat_data.sec_slave_id) { - st->compass_st_upper[0] = DATA_AKM8975_ST_X_UP; - st->compass_st_upper[1] = DATA_AKM8975_ST_Y_UP; - st->compass_st_upper[2] = DATA_AKM8975_ST_Z_UP; - st->compass_st_lower[0] = DATA_AKM8975_ST_X_LW; - st->compass_st_lower[1] = DATA_AKM8975_ST_Y_LW; - st->compass_st_lower[2] = DATA_AKM8975_ST_Z_LW; + st->compass_st_upper = AKM8975_ST_Upper; + st->compass_st_lower = AKM8975_ST_Lower; + data[0] = DATA_AKM_MODE_SM; } else if (COMPASS_ID_AK8972 == st->plat_data.sec_slave_id) { - st->compass_st_upper[0] = DATA_AKM8972_ST_X_UP; - st->compass_st_upper[1] = DATA_AKM8972_ST_Y_UP; - st->compass_st_upper[2] = DATA_AKM8972_ST_Z_UP; - st->compass_st_lower[0] = DATA_AKM8972_ST_X_LW; - st->compass_st_lower[1] = DATA_AKM8972_ST_Y_LW; - st->compass_st_lower[2] = DATA_AKM8972_ST_Z_LW; + st->compass_st_upper = AKM8972_ST_Upper; + st->compass_st_lower = AKM8972_ST_Lower; + data[0] = DATA_AKM_MODE_SM; } else if (COMPASS_ID_AK8963 == st->plat_data.sec_slave_id) { - st->compass_st_upper[0] = DATA_AKM8963_ST_X_UP; - st->compass_st_upper[1] = DATA_AKM8963_ST_Y_UP; - st->compass_st_upper[2] = DATA_AKM8963_ST_Z_UP; - st->compass_st_lower[0] = DATA_AKM8963_ST_X_LW; - st->compass_st_lower[1] = DATA_AKM8963_ST_Y_LW; - st->compass_st_lower[2] = DATA_AKM8963_ST_Z_LW; - data[0] |= (st->compass_scale << AKM8963_SCALE_SHIFT); + st->compass_st_upper = AKM8963_ST_Upper; + st->compass_st_lower = AKM8963_ST_Lower; + data[0] = DATA_AKM_MODE_SM | + (st->compass_scale << AKM8963_SCALE_SHIFT); } result = inv_i2c_single_write(st, REG_I2C_SLV1_DO, data[0]); if (result) @@ -2251,15 +1769,56 @@ static int inv_setup_compass(struct inv_gyro_state_s *st) return result; } +static void inv_setup_func_ptr(struct inv_mpu_iio_s *st) +{ + if (st->chip_type == INV_MPU3050) { + st->set_power_state = set_power_mpu3050; + st->switch_gyro_engine = inv_switch_3050_gyro_engine; + st->switch_accl_engine = inv_switch_3050_accl_engine; + st->init_config = inv_init_config_mpu3050; + st->setup_reg = inv_setup_reg_mpu3050; + } else { + st->set_power_state = set_power_itg; + st->switch_gyro_engine = inv_switch_gyro_engine; + st->switch_accl_engine = inv_switch_accl_engine; + st->init_config = inv_init_config; + st->setup_reg = inv_setup_reg; + /*MPU6XXX special functions */ + st->compass_en = inv_compass_enable; + st->quaternion_en = inv_quaternion_on; + st->gyro_en = inv_gyro_enable; + st->accl_en = inv_accl_enable; + } +} + +static int inv_detect_6xxx(struct inv_mpu_iio_s *st) +{ + int result; + u8 d; + + result = inv_i2c_read(st, REG_WHOAMI, 1, &d); + if (result) + return result; + if (d == MPU6500_ID) { + st->chip_type = INV_MPU6500; + strcpy(st->name, "mpu6500"); + } else { + strcpy(st->name, "mpu6050"); + } + + return 0; +} + /** * inv_check_chip_type() - check and setup chip type. */ -static int inv_check_chip_type(struct inv_gyro_state_s *st, +static int inv_check_chip_type(struct inv_mpu_iio_s *st, const struct i2c_device_id *id) { struct inv_reg_map_s *reg; - int result, chan_index; + int result; int t_ind; + if (!strcmp(id->name, "itg3500")) st->chip_type = INV_ITG3500; else if (!strcmp(id->name, "mpu3050")) @@ -2267,86 +1826,131 @@ static int inv_check_chip_type(struct inv_gyro_state_s *st, else if (!strcmp(id->name, "mpu6050")) st->chip_type = INV_MPU6050; else if (!strcmp(id->name, "mpu9150")) - st->chip_type = INV_MPU9150; + st->chip_type = INV_MPU6050; + else if (!strcmp(id->name, "mpu6500")) + st->chip_type = INV_MPU6500; + else if (!strcmp(id->name, "mpu9250")) + st->chip_type = INV_MPU6500; + else if (!strcmp(id->name, "mpu6xxx")) + st->chip_type = INV_MPU6050; else return -EPERM; - st->hw = (struct inv_hw_s *)(hw_info + st->chip_type); + inv_setup_func_ptr(st); + st->hw = &hw_info[st->chip_type]; st->mpu_slave = NULL; - chan_index = CHAN_INDEX_GYRO; - if (INV_MPU9150 == st->chip_type) { - st->plat_data.sec_slave_type = SECONDARY_SLAVE_TYPE_COMPASS; - st->plat_data.sec_slave_id = COMPASS_ID_AK8975; - st->chip_config.has_compass = 1; - chan_index = CHAN_INDEX_GYRO_ACCL_MAGN; - } - if (SECONDARY_SLAVE_TYPE_ACCEL == st->plat_data.sec_slave_type) { - if (ACCEL_ID_BMA250 == st->plat_data.sec_slave_id) - inv_register_bma250_slave(st); - chan_index = CHAN_INDEX_GYRO_ACCL; - } - if (SECONDARY_SLAVE_TYPE_COMPASS == st->plat_data.sec_slave_type) - st->chip_config.has_compass = 1; - else - st->chip_config.has_compass = 0; - if (INV_MPU6050 == st->chip_type) { - if (st->chip_config.has_compass) - chan_index = CHAN_INDEX_GYRO_ACCL_MAGN; - else - chan_index = CHAN_INDEX_GYRO_ACCL; - } - st->chan_info = &chip_channel_info[chan_index]; reg = &st->reg; - if (INV_MPU3050 == st->chip_type) - inv_setup_reg_mpu3050(reg); - else - inv_setup_reg(reg); - st->chip_config.gyro_enable = 1; - result = inv_set_power_state(st, 1); + st->setup_reg(reg); + /* reset to make sure previous state are not there */ + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, BIT_H_RESET); + if (result) + return result; + msleep(POWER_UP_TIME); + /* toggle power state */ + result = st->set_power_state(st, false); if (result) return result; - if (INV_ITG3500 != st->chip_type && INV_MPU3050 != st->chip_type) { - result = inv_get_silicon_rev_mpu6050(st); - if (result) { - inv_i2c_single_write(st, reg->pwr_mgmt_1, - BIT_SLEEP | INV_CLK_PLL); + result = st->set_power_state(st, true); + if (result) + return result; + + if (!strcmp(id->name, "mpu6xxx")) { + /* for MPU6500, reading register need more time */ + msleep(POWER_UP_TIME); + result = inv_detect_6xxx(st); + if (result) return result; + } + + switch (st->chip_type) { + case INV_ITG3500: + st->num_channels = INV_CHANNEL_NUM_GYRO; + break; + case INV_MPU6050: + case INV_MPU6500: + if (SECONDARY_SLAVE_TYPE_COMPASS == + st->plat_data.sec_slave_type) { + st->chip_config.has_compass = 1; + st->num_channels = + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION_MAGN; + } else { + st->chip_config.has_compass = 0; + st->num_channels = + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION; + } + break; + case INV_MPU3050: + if (SECONDARY_SLAVE_TYPE_ACCEL == + st->plat_data.sec_slave_type) { + if (ACCEL_ID_BMA250 == st->plat_data.sec_slave_id) + inv_register_mpu3050_slave(st); + st->num_channels = INV_CHANNEL_NUM_GYRO_ACCL; + } else { + st->num_channels = INV_CHANNEL_NUM_GYRO; } + break; + default: + result = st->set_power_state(st, false); + return -ENODEV; } + switch (st->chip_type) { + case INV_MPU6050: + result = inv_get_silicon_rev_mpu6050(st); + break; + case INV_MPU6500: + result = inv_get_silicon_rev_mpu6500(st); + break; + default: + result = 0; + break; + } + if (result) { + pr_err("read silicon rev error\n"); + st->set_power_state(st, false); + return result; + } + /* turn off the gyro engine after OTP reading */ + result = st->switch_gyro_engine(st, false); + if (result) + return result; + result = st->switch_accl_engine(st, false); + if (result) + return result; if (st->chip_config.has_compass) { result = inv_setup_compass(st); if (result) { - inv_i2c_single_write(st, reg->pwr_mgmt_1, - BIT_SLEEP | INV_CLK_PLL); + pr_err("compass setup failed\n"); + st->set_power_state(st, false); return result; } } t_ind = 0; memcpy(&inv_attributes[t_ind], inv_gyro_attributes, - sizeof(inv_gyro_attributes)); - t_ind = ARRAY_SIZE(inv_gyro_attributes); + sizeof(inv_gyro_attributes)); + t_ind += ARRAY_SIZE(inv_gyro_attributes); if (INV_MPU3050 == st->chip_type && st->mpu_slave != NULL) { memcpy(&inv_attributes[t_ind], inv_mpu3050_attributes, - sizeof(inv_mpu3050_attributes)); + sizeof(inv_mpu3050_attributes)); t_ind += ARRAY_SIZE(inv_mpu3050_attributes); inv_attributes[t_ind] = NULL; return 0; } - if (chan_index > CHAN_INDEX_GYRO) { + if ((INV_MPU6050 == st->chip_type) || (INV_MPU6500 == st->chip_type)) { memcpy(&inv_attributes[t_ind], inv_mpu6050_attributes, - sizeof(inv_mpu6050_attributes)); + sizeof(inv_mpu6050_attributes)); t_ind += ARRAY_SIZE(inv_mpu6050_attributes); } - if (chan_index > CHAN_INDEX_GYRO_ACCL) { + if (st->chip_config.has_compass) { memcpy(&inv_attributes[t_ind], inv_compass_attributes, - sizeof(inv_compass_attributes)); + sizeof(inv_compass_attributes)); t_ind += ARRAY_SIZE(inv_compass_attributes); } inv_attributes[t_ind] = NULL; + return 0; } @@ -2367,6 +1971,7 @@ static int inv_create_dmp_sysfs(struct iio_dev *ind) { int result; result = sysfs_create_bin_file(&ind->dev.kobj, &dmp_firmware); + return result; } @@ -2376,21 +1981,23 @@ static int inv_create_dmp_sysfs(struct iio_dev *ind) static int inv_mpu_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct inv_gyro_state_s *st; + struct inv_mpu_iio_s *st; struct iio_dev *indio_dev; - int result, reg_done; + int result; + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { - result = -ENODEV; + result = -ENOSYS; + pr_err("I2c function error\n"); goto out_no_free; } indio_dev = iio_allocate_device(sizeof(*st)); if (indio_dev == NULL) { + pr_err("memory allocation failed\n"); result = -ENOMEM; goto out_no_free; } - reg_done = 0; st = iio_priv(indio_dev); - st->i2c = client; + st->client = client; st->sl_handle = client->adapter; st->i2c_addr = client->addr; st->plat_data = @@ -2399,16 +2006,14 @@ static int inv_mpu_probe(struct i2c_client *client, result = inv_check_chip_type(st, id); if (result) goto out_free; - if (INV_MPU3050 == st->chip_type) - result = inv_init_config_mpu3050(indio_dev); - else - result = inv_init_config(indio_dev); + + result = st->init_config(indio_dev); if (result) { dev_err(&client->adapter->dev, "Could not initialize device.\n"); goto out_free; } - result = inv_set_power_state(st, 1); + result = st->set_power_state(st, false); if (result) { dev_err(&client->adapter->dev, "%s could not be turned off.\n", st->hw->name); @@ -2418,38 +2023,58 @@ static int inv_mpu_probe(struct i2c_client *client, /* Make state variables available to all _show and _store functions. */ i2c_set_clientdata(client, indio_dev); indio_dev->dev.parent = &client->dev; - indio_dev->name = id->name; - indio_dev->channels = st->chan_info->channels; - indio_dev->num_channels = st->chan_info->num_channels; + if (!strcmp(id->name, "mpu6xxx")) + indio_dev->name = st->name; + else + indio_dev->name = id->name; + indio_dev->channels = inv_mpu_channels; + indio_dev->num_channels = st->num_channels; + indio_dev->info = &mpu_info; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->currentmode = INDIO_DIRECT_MODE; result = inv_mpu_configure_ring(indio_dev); - if (result) + if (result) { + pr_err("configure ring buffer fail\n"); goto out_free; - result = iio_buffer_register(indio_dev, st->chan_info->channels, - st->chan_info->num_channels); - if (result) + } + result = iio_buffer_register(indio_dev, indio_dev->channels, + indio_dev->num_channels); + if (result) { + pr_err("ring buffer register fail\n"); goto out_unreg_ring; + } st->irq = client->irq; result = inv_mpu_probe_trigger(indio_dev); - if (result) + if (result) { + pr_err("trigger probe fail\n"); goto out_remove_ring; + } + + /* Tell the i2c counter, we have an IRQ */ + INV_I2C_SETIRQ(IRQ_MPU, client->irq); result = iio_device_register(indio_dev); - if (result) + if (result) { + pr_err("IIO device register fail\n"); goto out_remove_trigger; - if (INV_MPU6050 == st->chip_type || INV_MPU9150 == st->chip_type) { + } + + if (INV_MPU6050 == st->chip_type || + INV_MPU6500 == st->chip_type) { result = inv_create_dmp_sysfs(indio_dev); - if (result) + if (result) { + pr_err("create dmp sysfs failed\n"); goto out_unreg_iio; + } } INIT_KFIFO(st->timestamps); spin_lock_init(&st->time_stamp_lock); - pr_info("%s: Probe name %s\n", __func__, id->name); - dev_info(&client->adapter->dev, "%s is ready to go!\n", st->hw->name); + dev_info(&client->dev, "%s is ready to go!\n", + indio_dev->name); + return 0; out_unreg_iio: iio_device_unregister(indio_dev); @@ -2464,27 +2089,78 @@ static int inv_mpu_probe(struct i2c_client *client, iio_free_device(indio_dev); out_no_free: dev_err(&client->adapter->dev, "%s failed %d\n", __func__, result); + return -EIO; } +static void inv_mpu_shutdown(struct i2c_client *client) +{ + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct inv_reg_map_s *reg; + int result; + + reg = &st->reg; + dev_dbg(&client->adapter->dev, "Shutting down %s...\n", st->hw->name); + + /* reset to make sure previous state are not there */ + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, BIT_H_RESET); + if (result) + dev_err(&client->adapter->dev, "Failed to reset %s\n", + st->hw->name); + msleep(POWER_UP_TIME); + /* turn off power to ensure gyro engine is off */ + result = st->set_power_state(st, false); + if (result) + dev_err(&client->adapter->dev, "Failed to turn off %s\n", + st->hw->name); +} + /** * inv_mpu_remove() - remove function. */ static int inv_mpu_remove(struct i2c_client *client) { struct iio_dev *indio_dev = i2c_get_clientdata(client); - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); kfifo_free(&st->timestamps); iio_device_unregister(indio_dev); - inv_mpu_remove_trigger(indio_dev); + if (indio_dev->modes & INDIO_BUFFER_TRIGGERED) + inv_mpu_remove_trigger(indio_dev); iio_buffer_unregister(indio_dev); inv_mpu_unconfigure_ring(indio_dev); iio_free_device(indio_dev); dev_info(&client->adapter->dev, "inv-mpu-iio module removed.\n"); + return 0; } -static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; + +#ifdef CONFIG_PM +static int inv_mpu_resume(struct device *dev) +{ + struct inv_mpu_iio_s *st = + iio_priv(i2c_get_clientdata(to_i2c_client(dev))); + pr_debug("%s inv_mpu_resume\n", st->hw->name); + return st->set_power_state(st, true); +} + +static int inv_mpu_suspend(struct device *dev) +{ + struct inv_mpu_iio_s *st = + iio_priv(i2c_get_clientdata(to_i2c_client(dev))); + pr_debug("%s inv_mpu_suspend\n", st->hw->name); + return st->set_power_state(st, false); +} +static const struct dev_pm_ops inv_mpu_pmops = { + SET_SYSTEM_SLEEP_PM_OPS(inv_mpu_suspend, inv_mpu_resume) +}; +#define INV_MPU_PMOPS (&inv_mpu_pmops) +#else +#define INV_MPU_PMOPS NULL +#endif /* CONFIG_PM */ + +static const u16 normal_i2c[] = { I2C_CLIENT_END }; /* device id table is used to identify what device can be * supported by this driver */ @@ -2493,6 +2169,9 @@ static const struct i2c_device_id inv_mpu_id[] = { {"mpu3050", INV_MPU3050}, {"mpu6050", INV_MPU6050}, {"mpu9150", INV_MPU9150}, + {"mpu6500", INV_MPU6500}, + {"mpu9250", INV_MPU9250}, + {"mpu6xxx", INV_MPU6XXX}, {} }; @@ -2502,10 +2181,12 @@ static struct i2c_driver inv_mpu_driver = { .class = I2C_CLASS_HWMON, .probe = inv_mpu_probe, .remove = inv_mpu_remove, + .shutdown = inv_mpu_shutdown, .id_table = inv_mpu_id, .driver = { .owner = THIS_MODULE, .name = "inv-mpu-iio", + .pm = INV_MPU_PMOPS, }, .address_list = normal_i2c, }; @@ -2514,7 +2195,7 @@ static int __init inv_mpu_init(void) { int result = i2c_add_driver(&inv_mpu_driver); if (result) { - pr_err("%s failed\n", __func__); + pr_err("failed\n"); return result; } return 0; @@ -2532,7 +2213,7 @@ MODULE_AUTHOR("Invensense Corporation"); MODULE_DESCRIPTION("Invensense device driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("inv-mpu-iio"); + /** * @} */ - diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h index 303ed197bfd..b26d27b1548 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h +++ b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h @@ -17,28 +17,28 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro.h - * @brief Struct definitions for the Invensense gyro driver. + * @file inv_mpu_iio.h + * @brief Struct definitions for the Invensense mpu driver. */ -#ifndef _INV_GYRO_H_ -#define _INV_GYRO_H_ +#ifndef _INV_MPU_IIO_H_ +#define _INV_MPU_IIO_H_ #include #include #include -#include #include #include + #include "../../iio.h" #include "../../buffer.h" + #include "dmpKey.h" + /** * struct inv_reg_map_s - Notable slave registers. - * @who_am_i: Upper 6 bits of the device's slave address. * @sample_rate_div: Divider applied to gyro output rate. * @lpf: Configures internal LPF. - * @product_id: Product revision. * @bank_sel: Selects between memory banks. * @user_ctrl: Enables/resets the FIFO. * @fifo_en: Determines which data will appear in FIFO. @@ -58,34 +58,35 @@ * @prgm_strt_addrh firmware program start address register */ struct inv_reg_map_s { - unsigned char who_am_i; - unsigned char sample_rate_div; - unsigned char lpf; - unsigned char product_id; - unsigned char bank_sel; - unsigned char user_ctrl; - unsigned char fifo_en; - unsigned char gyro_config; - unsigned char accl_config; - unsigned char fifo_count_h; - unsigned char fifo_r_w; - unsigned char raw_gyro; - unsigned char raw_accl; - unsigned char temperature; - unsigned char int_enable; - unsigned char int_status; - unsigned char pwr_mgmt_1; - unsigned char pwr_mgmt_2; - unsigned char mem_start_addr; - unsigned char mem_r_w; - unsigned char prgm_strt_addrh; + u8 sample_rate_div; + u8 lpf; + u8 bank_sel; + u8 user_ctrl; + u8 fifo_en; + u8 gyro_config; + u8 accl_config; + u8 fifo_count_h; + u8 fifo_r_w; + u8 raw_gyro; + u8 raw_accl; + u8 temperature; + u8 int_enable; + u8 int_status; + u8 pwr_mgmt_1; + u8 pwr_mgmt_2; + u8 mem_start_addr; + u8 mem_r_w; + u8 prgm_strt_addrh; }; - +/*device enum */ enum inv_devices { - INV_ITG3500 = 0, - INV_MPU3050 = 1, - INV_MPU6050 = 2, - INV_MPU9150 = 3, + INV_ITG3500, + INV_MPU3050, + INV_MPU6050, + INV_MPU9150, + INV_MPU6500, + INV_MPU9250, + INV_MPU6XXX, INV_NUM_PARTS }; @@ -104,7 +105,7 @@ struct test_setup_t { int lpf; int fsr; int accl_fs; - unsigned int accl_sens[3]; + u32 accl_sens[3]; }; /** @@ -113,17 +114,15 @@ struct test_setup_t { * @name: name of the chip */ struct inv_hw_s { - unsigned char num_reg; - unsigned char *name; + u8 num_reg; + u8 *name; }; /** * struct inv_chip_config_s - Cached chip configuration data. * @fsr: Full scale range. * @lpf: Digital low pass filter frequency. - * @clk_src: Clock source. * @accl_fs: accel full scale range. - * @lpa_freq: low power frequency * @self_test_run_once flag for self test run ever. * @has_footer: MPU3050 specific work around. * @has_compass: has compass or not. @@ -137,46 +136,50 @@ struct inv_hw_s { * @is_asleep: 1 if chip is powered down. * @dmp_on: dmp is on/off. * @dmp_int_on: dmp interrupt on/off. - * @orientation_on: dmp is on/off. + * @dmp_event_int_on: dmp event interrupt on/off. * @firmware_loaded: flag indicate firmware loaded or not. * @lpa_mod: low power mode. * @tap_on: tap on/off. - * @flick_int_on: flick interrupt on/off. * @quaternion_on: send quaternion data on/off. * @display_orient_on: display orientation on/off. + * @normal_compass_measure: discard first compass data after reset. + * @smd_enable: disable/enable SMD function. + * @lpa_freq: low power frequency * @prog_start_addr: firmware program start address. - * @dmp_output_rate: dmp output rate. - * @fifo_rate: FIFO update rate. + * @fifo_rate: current FIFO update rate. + * @new_fifo_rate: set FIFO update rate + * @dmp_output_rate: current dmp output rate. */ struct inv_chip_config_s { - unsigned int fsr:2; - unsigned int lpf:3; - unsigned int clk_src:1; - unsigned int accl_fs:2; - unsigned int lpa_freq:2; - unsigned int self_test_run_once:1; - unsigned int has_footer:1; - unsigned int has_compass:1; - unsigned int enable:1; - unsigned int accl_enable:1; - unsigned int accl_fifo_enable:1; - unsigned int gyro_enable:1; - unsigned int gyro_fifo_enable:1; - unsigned int compass_enable:1; - unsigned int compass_fifo_enable:1; - unsigned int is_asleep:1; - unsigned int dmp_on:1; - unsigned int dmp_int_on:1; - unsigned int orientation_on:1; - unsigned int firmware_loaded:1; - unsigned int lpa_mode:1; - unsigned int tap_on:1; - unsigned int flick_int_on:1; - unsigned int quaternion_on:1; - unsigned int display_orient_on:1; - unsigned short prog_start_addr; - unsigned short fifo_rate; - unsigned char dmp_output_rate; + u32 fsr:2; + u32 lpf:3; + u32 accl_fs:2; + u32 self_test_run_once:1; + u32 has_footer:1; + u32 has_compass:1; + u32 enable:1; + u32 accl_enable:1; + u32 accl_fifo_enable:1; + u32 gyro_enable:1; + u32 gyro_fifo_enable:1; + u32 compass_enable:1; + u32 compass_fifo_enable:1; + u32 is_asleep:1; + u32 dmp_on:1; + u32 dmp_int_on:1; + u32 dmp_event_int_on:1; + u32 firmware_loaded:1; + u32 lpa_mode:1; + u32 tap_on:1; + u32 quaternion_on:1; + u32 display_orient_on:1; + u32 normal_compass_measure:1; + u32 smd_enable:1; + u16 lpa_freq; + u16 prog_start_addr; + u16 fifo_rate; + u16 new_fifo_rate; + u16 dmp_output_rate; }; /** @@ -191,40 +194,23 @@ struct inv_chip_config_s { * @accl_sens_trim: accel sensitivity trim factor. */ struct inv_chip_info_s { - unsigned char product_id; - unsigned char product_revision; - unsigned char silicon_revision; - unsigned char software_revision; - unsigned char multi; - unsigned char compass_sens[3]; - unsigned long gyro_sens_trim; - unsigned long accl_sens_trim; -}; -/** - * struct inv_chip_chan_info - Chip channel information. - * @channels: channel specification. - * @num_channels: number of channels. - */ -struct inv_chip_chan_info { - const struct iio_chan_spec *channels; - int num_channels; + u8 product_id; + u8 product_revision; + u8 silicon_revision; + u8 software_revision; + u8 multi; + u8 compass_sens[3]; + u32 gyro_sens_trim; + u32 accl_sens_trim; }; -/** - * struct inv_flick_s structure to store flick data. - * @lower: lower bound of flick. - * @upper: upper bound of flick. - * @counter: counter of flick. - * @msg_on; message to carry flick - * @axis: axis of flick - */ -struct inv_flick_s { - int lower; - int upper; - int counter; - char msg_on; - char axis; +enum inv_channel_num { + INV_CHANNEL_NUM_GYRO = 4, + INV_CHANNEL_NUM_GYRO_ACCL = 7, + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION = 11, + INV_CHANNEL_NUM_GYRO_ACCL_QUANTERNION_MAGN = 14, }; + /** * struct inv_tap_s structure to store tap data. * @min_count: minimum taps counted. @@ -232,26 +218,72 @@ struct inv_flick_s { * @time: tap time. */ struct inv_tap_s { - char min_count; - short thresh; - short time; + u16 min_count; + u16 thresh; + u16 time; +}; + +/** + * struct accel_mot_int_s structure to store motion interrupt data + * @mot_thr: motion threshold. + * @mot_dur: motion duration. + * @mot_on: flag to indicate motion detection on; + */ +struct accel_mot_int_s { + u16 mot_thr; + u32 mot_dur; + u8 mot_on:1; +}; + +/** + * struct self_test_setting - self test settables from sysfs + * samples: number of samples used in self test. + * threshold: threshold fail/pass criterion in self test. + * This value is in the percentage multiplied by 100. + * So 14% would be 14. + */ +struct self_test_setting { + u16 samples; + u16 threshold; }; + +/** + * struct inv_smd_s significant motion detection structure. + * threshold: accel threshold for motion detection. + * delay: delay time to confirm 2nd motion. + * delay2: delay window parameter. + */ +struct inv_smd_s { + u32 threshold; + u32 delay; + u32 delay2; +}; + struct inv_mpu_slave; /** - * struct inv_gyro_state_s - Driver state variables. + * struct inv_mpu_iio_s - Driver state variables. * @chip_config: Cached attribute information. * @chip_info: Chip information from read-only registers. * @trig; iio trigger. - * @flick: flick data structure - * @tap: tap data structure + * @tap: tap data structure. + * @smd: SMD data structure. * @reg: Map of important registers. + * @self_test: self test settings. * @hw: Other hardware-specific information. * @chip_type: chip type. * @time_stamp_lock: spin lock to time stamp. - * @i2c: i2c client handle. + * @client: i2c client handle. * @plat_data: platform data. * @mpu_slave: mpu slave handle. - * @chan_info: channel information + * (*set_power_state)(struct inv_mpu_iio_s *, int on): function ptr + * (*switch_gyro_engine)(struct inv_mpu_iio_s *, int on): function ptr + * (*switch_accl_engine)(struct inv_mpu_iio_s *, int on): function ptr + * (*compass_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool); + * (*quaternion_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool) + * (*gyro_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool): func ptr. + * (*accl_en)(struct inv_mpu_iio_s *, struct iio_buffer *, bool): func ptr. + * (*init_config)(struct iio_dev *indio_dev): function ptr + * void (*setup_reg)(struct inv_reg_map_s *reg): function ptr * @timestamps: kfifo queue to store time stamp. * @compass_st_upper: compass self test upper limit. * @compass_st_lower: compass self test lower limit. @@ -261,64 +293,91 @@ struct inv_mpu_slave; * @raw_gyro: raw gyro data. * @raw_accel: raw accel data. * @raw_compass: raw compass. - * @compass_scale: compass scale. - * @i2c_addr: i2c address. - * @compass_divider: slow down compass rate. - * @compass_counter: slow down compass rate. + * @raw_quaternion raw quaternion data. + * @int input_accel_bias[3]: accel bias from sysfs. + * @compass_scale: compass scale. + * @i2c_addr: i2c address. + * @compass_divider: slow down compass rate. + * @compass_dmp_divider: slow down compass rate for dmp. + * @compass_counter: slow down compass rate. * @sample_divider: sample divider for dmp. * @fifo_divider: fifo divider for dmp. - * @orient_data: orientation data. - * @display_orient_data: display orient data. + * @display_orient_data:display orient data. * @tap_data: tap data. + * @num_channels: number of channels for current chip. * @sl_handle: Handle to I2C port. - * @irq_dur_us: duration between each irq. - * @last_isr_time: last isr time. + * @irq_dur_ns: duration between each irq. + * @last_isr_time: last isr time. + * @mpu6500_last_motion_time: MPU6500 last real motion interrupt time. + * @name: name for distiguish MPU6050 and MPU6500 in MPU6XXX. */ -struct inv_gyro_state_s { +struct inv_mpu_iio_s { #define TIMESTAMP_FIFO_SIZE 16 struct inv_chip_config_s chip_config; struct inv_chip_info_s chip_info; struct iio_trigger *trig; - struct inv_flick_s flick; struct inv_tap_s tap; + struct inv_smd_s smd; struct inv_reg_map_s reg; - struct inv_hw_s *hw; + struct self_test_setting self_test; + const struct inv_hw_s *hw; enum inv_devices chip_type; spinlock_t time_stamp_lock; - struct i2c_client *i2c; + struct i2c_client *client; struct mpu_platform_data plat_data; struct inv_mpu_slave *mpu_slave; - struct inv_chip_chan_info *chan_info; - DECLARE_KFIFO(timestamps, long long, TIMESTAMP_FIFO_SIZE); - short compass_st_upper[3]; - short compass_st_lower[3]; + struct accel_mot_int_s mot_int; + int (*set_power_state)(struct inv_mpu_iio_s *, bool on); + int (*switch_gyro_engine)(struct inv_mpu_iio_s *, bool on); + int (*switch_accl_engine)(struct inv_mpu_iio_s *, bool on); + int (*compass_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*quaternion_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*gyro_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*accl_en)(struct inv_mpu_iio_s *, + struct iio_buffer *ring, bool on); + int (*init_config)(struct iio_dev *indio_dev); + void (*setup_reg)(struct inv_reg_map_s *reg); + DECLARE_KFIFO(timestamps, u64, TIMESTAMP_FIFO_SIZE); + const short *compass_st_upper; + const short *compass_st_lower; short irq; int accel_bias[3]; int gyro_bias[3]; short raw_gyro[3]; short raw_accel[3]; short raw_compass[3]; - unsigned char compass_scale; - unsigned char i2c_addr; - unsigned char compass_divider; - unsigned char compass_counter; - unsigned char sample_divider; - unsigned char fifo_divider; - unsigned char orient_data; - unsigned char display_orient_data; - unsigned char tap_data; + int raw_quaternion[4]; + int input_accel_bias[3]; + u8 compass_scale; + u8 i2c_addr; + u8 compass_divider; + u8 compass_counter; + u8 compass_dmp_divider; + u8 sample_divider; + u8 fifo_divider; + u8 display_orient_data; + u8 tap_data; + enum inv_channel_num num_channels; void *sl_handle; - unsigned int irq_dur_us; - long long last_isr_time; + u32 irq_dur_ns; + u64 last_isr_time; + u64 mpu6500_last_motion_time; + u8 name[20]; + u8 secondary_name[20]; }; + /* produces an unique identifier for each device based on the combination of product version and product revision */ struct prod_rev_map_t { - unsigned short mpl_product_key; - unsigned char silicon_rev; - unsigned short gyro_trim; - unsigned short accel_trim; + u16 mpl_product_key; + u8 silicon_rev; + u16 gyro_trim; + u16 accel_trim; }; + /** * struct inv_mpu_slave - MPU slave structure. * @suspend: suspend operation. @@ -330,196 +389,275 @@ struct prod_rev_map_t { * @set_fs set full scale */ struct inv_mpu_slave { - int (*suspend)(struct inv_gyro_state_s *); - int (*resume)(struct inv_gyro_state_s *); - int (*setup)(struct inv_gyro_state_s *); - int (*combine_data)(unsigned char *in, short *out); - int (*get_mode)(struct inv_gyro_state_s *); - int (*set_lpf)(struct inv_gyro_state_s *, int rate); - int (*set_fs)(struct inv_gyro_state_s *, int fs); + int (*suspend)(struct inv_mpu_iio_s *); + int (*resume)(struct inv_mpu_iio_s *); + int (*setup)(struct inv_mpu_iio_s *); + int (*combine_data)(u8 *in, short *out); + int (*get_mode)(void); + int (*set_lpf)(struct inv_mpu_iio_s *, int rate); + int (*set_fs)(struct inv_mpu_iio_s *, int fs); }; + /* AKM definitions */ -#define REG_AKM_ID (0x00) -#define REG_AKM_STATUS (0x02) -#define REG_AKM_MEASURE_DATA (0x03) -#define REG_AKM_MODE (0x0A) -#define REG_AKM_ST_CTRL (0x0C) -#define REG_AKM_SENSITIVITY (0x10) -#define REG_AKM8963_CNTL1 (0x0A) - -#define DATA_AKM_ID (0x48) -#define DATA_AKM_MODE_PW_DN (0x00) -#define DATA_AKM_MODE_PW_SM (0x01) -#define DATA_AKM_MODE_PW_ST (0x08) -#define DATA_AKM_MODE_PW_FR (0x0F) -#define DATA_AKM_SELF_TEST (0x40) -#define DATA_AKM_DRDY (0x01) -#define DATA_AKM8963_BIT (0x10) -#define DATA_AKM_STAT_MASK (0x0C) - -#define DATA_AKM8975_SCALE (9830) -#define DATA_AKM8972_SCALE (19661) -#define DATA_AKM8963_SCALE0 (19661) -#define DATA_AKM8963_SCALE1 (4915) -#define AKM8963_SCALE_SHIFT (4) -#define NUM_BYTES_COMPASS_SLAVE (8) - -#define DATA_AKM8975_ST_X_UP (100) -#define DATA_AKM8975_ST_X_LW (-100) -#define DATA_AKM8975_ST_Y_UP (100) -#define DATA_AKM8975_ST_Y_LW (-100) -#define DATA_AKM8975_ST_Z_UP (-300) -#define DATA_AKM8975_ST_Z_LW (-1000) - -#define DATA_AKM8972_ST_X_UP (50) -#define DATA_AKM8972_ST_X_LW (-50) -#define DATA_AKM8972_ST_Y_UP (50) -#define DATA_AKM8972_ST_Y_LW (-50) -#define DATA_AKM8972_ST_Z_UP (-100) -#define DATA_AKM8972_ST_Z_LW (-500) - -#define DATA_AKM8963_ST_X_UP (200) -#define DATA_AKM8963_ST_X_LW (-200) -#define DATA_AKM8963_ST_Y_UP (200) -#define DATA_AKM8963_ST_Y_LW (-200) -#define DATA_AKM8963_ST_Z_UP (-800) -#define DATA_AKM8963_ST_Z_LW (-3200) - - -/* register definition*/ -#define REG_3050_AUX_VDDIO (0x13) -#define REG_3050_SLAVE_ADDR (0x14) -#define REG_3050_AUX_BST_ADDR (0x18) -#define REG_3050_AUX_XOUT_H (0x23) - -#define REG_3500_OTP (0x00) - -#define REG_YGOFFS_TC (0x01) -#define REG_XA_OFFS_L_TC (0x07) -#define REG_ST_GCT_X (0x0D) -#define REG_I2C_MST_CTRL (0x24) -#define REG_I2C_SLV0_ADDR (0x25) -#define REG_I2C_SLV0_REG (0x26) -#define REG_I2C_SLV0_CTRL (0x27) -#define REG_I2C_SLV1_ADDR (0x28) -#define REG_I2C_SLV1_REG (0x29) -#define REG_I2C_SLV1_CTRL (0x2A) - -#define REG_I2C_SLV4_CTRL (0x34) -#define REG_INT_PIN_CFG (0x37) -#define REG_DMP_INT_STATUS (0x39) -#define REG_EXT_SENS_DATA_00 (0x49) -#define REG_I2C_SLV1_DO (0x64) -#define REG_I2C_MST_DELAY_CTRL (0x67) -#define REG_BANK_SEL (0x6D) -#define REG_MEM_START (0x6E) -#define REG_MEM_RW (0x6F) - -/* bit definitions */ -#define BIT_3050_VDDIO (0x04) -#define BIT_3050_AUX_IF_EN (0x20) -#define BIT_3050_FIFO_RST (0x02) - -#define BIT_BYPASS_EN (0x2) -#define BIT_WAIT_FOR_ES (0x40) -#define BIT_I2C_READ (0x80) -#define BIT_SLV_EN (0x80) -#define BIT_I2C_MST_VDDIO (0x80) - -#define BIT_DMP_EN (0x80) -#define BIT_FIFO_EN (0x40) -#define BIT_I2C_MST_EN (0x20) -#define BIT_DMP_RST (0x08) -#define BIT_FIFO_RST (0x04) - -#define BIT_SLV0_DLY_EN (0x01) -#define BIT_SLV1_DLY_EN (0x02) - -#define BIT_FIFO_OVERFLOW (0x10) -#define BIT_DATA_RDY_EN (0x01) -#define BIT_DMP_INT_EN (0x02) - -#define BIT_PWR_ACCL_STBY (0x38) -#define BIT_PWR_GYRO_STBY (0x07) - -#define BIT_GYRO_XOUT (0x40) -#define BIT_GYRO_YOUT (0x20) -#define BIT_GYRO_ZOUT (0x10) -#define BIT_ACCEL_OUT (0x08) -#define BITS_GYRO_OUT (0x70) -#define BITS_SELF_TEST_EN (0xE0) -#define BITS_3050_ACCL_OUT (0x0E) -#define BITS_3050_POWER1 (0x30) -#define BITS_3050_POWER2 (0x10) -#define BITS_3050_GYRO_STANDBY (0x38) -#define BITS_FSR (0x18) -#define BITS_LPF (0x07) -#define BITS_CLK (0x07) -#define BIT_3500_FIFO_OVERFLOW (0x10) -#define BIT_SLEEP (0x40) -#define BIT_CYCLE (0x20) -#define BIT_LPA_FREQ (0xC0) - -#define DMP_START_ADDR (0x400) -#define BYTES_FOR_DMP (16) -#define QUATERNION_BYTES (16) -#define BYTES_PER_SENSOR (6) -#define MPU3050_FOOTER_SIZE (2) -#define FIFO_COUNT_BYTE (2) -#define FIFO_THRESHOLD (500) -#define POWER_UP_TIME (100) -#define SENSOR_UP_TIME (30) -#define MPU_MEM_BANK_SIZE (256) -#define MPU3050_TEMP_OFFSET (5383314L) -#define MPU3050_TEMP_SCALE (3834792L) -#define MPU6050_TEMP_OFFSET (2462307L) -#define MPU6050_TEMP_SCALE (2977653L) -#define MPU_TEMP_SHIFT (16) -#define LPA_FREQ_SHIFT (6) -#define COMPASS_RATE_SCALE (10) -#define MAX_GYRO_FS_PARAM (3) -#define MAX_ACCL_FS_PARAM (3) -#define MAX_LPA_FREQ_PARAM (3) -#define THREE_AXIS (3) -#define GYRO_CONFIG_FSR_SHIFT (3) -#define ACCL_CONFIG_FSR_SHIFT (3) -#define GYRO_DPS_SCALE (250) -#define MEM_ADDR_PROD_REV (0x6) -#define SOFT_PROD_VER_BYTES (5) -#define CHAN_INDEX_GYRO (0) -#define CHAN_INDEX_GYRO_ACCL (1) -#define CHAN_INDEX_GYRO_ACCL_MAGN (2) +#define REG_AKM_ID 0x00 +#define REG_AKM_STATUS 0x02 +#define REG_AKM_MEASURE_DATA 0x03 +#define REG_AKM_MODE 0x0A +#define REG_AKM_ST_CTRL 0x0C +#define REG_AKM_SENSITIVITY 0x10 +#define REG_AKM8963_CNTL1 0x0A + +#define DATA_AKM_ID 0x48 +#define DATA_AKM_MODE_PD 0x00 +#define DATA_AKM_MODE_SM 0x01 +#define DATA_AKM_MODE_ST 0x08 +#define DATA_AKM_MODE_FR 0x0F +#define DATA_AKM_SELF_TEST 0x40 +#define DATA_AKM_DRDY 0x01 +#define DATA_AKM8963_BIT 0x10 +#define DATA_AKM_STAT_MASK 0x0C + +#define DATA_AKM8975_SCALE (9830 * (1L << 15)) +#define DATA_AKM8972_SCALE (19661 * (1L << 15)) +#define DATA_AKM8963_SCALE0 (19661 * (1L << 15)) +#define DATA_AKM8963_SCALE1 (4915 * (1L << 15)) +#define AKM8963_SCALE_SHIFT 4 +#define NUM_BYTES_COMPASS_SLAVE 8 + +/*register and associated bit definition*/ +#define REG_3050_FIFO_EN 0x12 +#define BITS_3050_ACCL_OUT 0x0E + +#define REG_3050_AUX_VDDIO 0x13 +#define BIT_3050_VDDIO 0x04 + +#define REG_3050_SLAVE_ADDR 0x14 +#define REG_3050_SAMPLE_RATE_DIV 0x15 +#define REG_3050_LPF 0x16 +#define REG_3050_INT_ENABLE 0x17 +#define REG_3050_AUX_BST_ADDR 0x18 +#define REG_3050_INT_STATUS 0x1A +#define REG_3050_TEMPERATURE 0x1B +#define REG_3050_RAW_GYRO 0x1D +#define REG_3050_AUX_XOUT_H 0x23 +#define REG_3050_FIFO_COUNT_H 0x3A +#define REG_3050_FIFO_R_W 0x3C + +#define REG_3050_USER_CTRL 0x3D +#define BIT_3050_AUX_IF_EN 0x20 +#define BIT_3050_FIFO_RST 0x02 + +#define REG_3050_PWR_MGMT_1 0x3E +#define BITS_3050_POWER1 0x30 +#define BITS_3050_POWER2 0x10 +#define BITS_3050_GYRO_STANDBY 0x38 + +#define REG_3500_OTP 0x0 + +#define REG_YGOFFS_TC 0x1 +#define BIT_I2C_MST_VDDIO 0x80 + +#define REG_XA_OFFS_L_TC 0x7 +#define REG_PRODUCT_ID 0xC +#define REG_ST_GCT_X 0xD +#define REG_SAMPLE_RATE_DIV 0x19 +#define REG_CONFIG 0x1A + +#define REG_GYRO_CONFIG 0x1B +#define BITS_SELF_TEST_EN 0xE0 + +#define REG_ACCEL_CONFIG 0x1C +#define REG_ACCEL_MOT_THR 0x1F +#define REG_ACCEL_MOT_DUR 0x20 + +#define REG_FIFO_EN 0x23 +#define BIT_ACCEL_OUT 0x08 +#define BITS_GYRO_OUT 0x70 + + +#define REG_I2C_MST_CTRL 0x24 +#define BIT_WAIT_FOR_ES 0x40 + +#define REG_I2C_SLV0_ADDR 0x25 +#define BIT_I2C_READ 0x80 + +#define REG_I2C_SLV0_REG 0x26 + +#define REG_I2C_SLV0_CTRL 0x27 +#define BIT_SLV_EN 0x80 + +#define REG_I2C_SLV1_ADDR 0x28 +#define REG_I2C_SLV1_REG 0x29 +#define REG_I2C_SLV1_CTRL 0x2A + +#define REG_I2C_SLV2_ADDR 0x2B +#define REG_I2C_SLV2_REG 0x2C +#define REG_I2C_SLV2_CTRL 0x2D + +#define REG_I2C_SLV4_CTRL 0x34 + +#define REG_INT_PIN_CFG 0x37 +#define BIT_BYPASS_EN 0x2 + +#define REG_INT_ENABLE 0x38 +#define BIT_DATA_RDY_EN 0x01 +#define BIT_DMP_INT_EN 0x02 +#define BIT_ZMOT_EN 0x20 +#define BIT_MOT_EN 0x40 +#define BIT_6500_WOM_EN 0x40 + +#define REG_DMP_INT_STATUS 0x39 +#define SMD_INT_ON 0x04 + +#define REG_INT_STATUS 0x3A +#define BIT_MOT_INT 0x40 +#define BIT_ZMOT_INT 0x20 + +#define REG_RAW_ACCEL 0x3B +#define REG_TEMPERATURE 0x41 +#define REG_RAW_GYRO 0x43 +#define REG_EXT_SENS_DATA_00 0x49 + +#define REG_ACCEL_INTEL_STATUS 0x61 + +#define REG_I2C_SLV1_DO 0x64 + +#define REG_I2C_MST_DELAY_CTRL 0x67 +#define BIT_SLV0_DLY_EN 0x01 +#define BIT_SLV1_DLY_EN 0x02 +#define BIT_SLV2_DLY_EN 0x04 + +#define REG_USER_CTRL 0x6A +#define BIT_FIFO_RST 0x04 +#define BIT_DMP_RST 0x08 +#define BIT_I2C_MST_EN 0x20 +#define BIT_FIFO_EN 0x40 +#define BIT_DMP_EN 0x80 + +#define REG_PWR_MGMT_1 0x6B +#define BIT_H_RESET 0x80 +#define BIT_SLEEP 0x40 +#define BIT_CYCLE 0x20 +#define BIT_CLK_MASK 0x7 + +#define REG_PWR_MGMT_2 0x6C +#define BIT_PWR_ACCL_STBY 0x38 +#define BIT_PWR_GYRO_STBY 0x07 +#define BIT_LPA_FREQ 0xC0 + +#define REG_BANK_SEL 0x6D +#define REG_MEM_START_ADDR 0x6E +#define REG_MEM_RW 0x6F +#define REG_PRGM_STRT_ADDRH 0x70 +#define REG_FIFO_COUNT_H 0x72 +#define REG_FIFO_R_W 0x74 +#define REG_WHOAMI 0x75 + +#define REG_6500_XG_ST_DATA 0x0 +#define REG_6500_XA_ST_DATA 0xD +#define REG_6500_ACCEL_CONFIG2 0x1D +#define BIT_ACCEL_FCHOCIE_B 0x08 + +#define REG_6500_LP_ACCEL_ODR 0x1E +#define REG_6500_ACCEL_WOM_THR 0x1F + +#define REG_6500_ACCEL_INTEL_CTRL 0x69 +#define BIT_ACCEL_INTEL_ENABLE 0x80 +#define BIT_ACCEL_INTEL_MODE 0x40 + +/* data definitions */ +#define DMP_START_ADDR 0x400 +#define DMP_MASK_TAP 0x3f +#define DMP_MASK_DIS_ORIEN 0xC0 +#define DMP_DIS_ORIEN_SHIFT 6 + +#define BYTES_FOR_DMP 16 +#define BYTES_FOR_EVENTS 4 +#define QUATERNION_BYTES 16 +#define BYTES_PER_SENSOR 6 +#define MPU3050_FOOTER_SIZE 2 +#define FIFO_COUNT_BYTE 2 +#define FIFO_THRESHOLD 500 +#define POWER_UP_TIME 100 +#define SENSOR_UP_TIME 30 +#define REG_UP_TIME 5 +#define MPU_MEM_BANK_SIZE 256 + +#define MPU6XXX_MAX_MOTION_THRESH (255*4) +#define MPU6XXX_MOTION_THRESH_SHIFT 5 +#define MPU6050_MOTION_DUR_DEFAULT 1 +#define MPU6050_ID 0x68 +#define MPU6050_MAX_MOTION_DUR 255 +#define MPU_TEMP_SHIFT 16 +#define LPA_FREQ_SHIFT 6 +#define COMPASS_RATE_SCALE 10 +#define MAX_GYRO_FS_PARAM 3 +#define MAX_ACCL_FS_PARAM 3 +#define MAX_LPA_FREQ_PARAM 3 + +#define INIT_MOT_DUR 128 +#define INIT_MOT_THR 128 +#define INIT_ZMOT_DUR 128 +#define INIT_ZMOT_THR 128 +#define INIT_ST_SAMPLES 50 +#define INIT_ST_THRESHOLD 14 +#define ST_THRESHOLD_MULTIPLIER 10 +#define ST_MAX_SAMPLES 500 +#define ST_MAX_THRESHOLD 100 + +/*---- MPU6500 ----*/ +#define MPU6500_ID 0x70 /* unique WHOAMI */ +#define MPU6500_PRODUCT_REVISION 1 +#define MPU6500_MEM_REV_ADDR 0x17 +#define MPU6500_REV 2 + +/*---- MPU9250 ----*/ +#define MPU9250_ID 0x71 /* unique WHOAMI */ + +#define THREE_AXIS 3 +#define GYRO_CONFIG_FSR_SHIFT 3 +#define ACCL_CONFIG_FSR_SHIFT 3 +#define GYRO_DPS_SCALE 250 +#define MEM_ADDR_PROD_REV 0x6 +#define SOFT_PROD_VER_BYTES 5 +#define CRC_FIRMWARE_SEED 0 +#define SELF_TEST_SUCCESS 1 +#define MS_PER_DMP_TICK 20 /* init parameters */ -#define INIT_FIFO_RATE (50) -#define INIT_DUR_TIME ((1000/INIT_FIFO_RATE)*1000) -#define INIT_TAP_THRESHOLD (100) -#define INIT_TAP_TIME (100) -#define INIT_TAP_MIN_COUNT (2) +#define INIT_FIFO_RATE 50 +#define INIT_DMP_OUTPUT_RATE 25 +#define INIT_DUR_TIME ((1000 / INIT_FIFO_RATE) * 1000 * 1000) +#define INIT_TAP_THRESHOLD 100 +#define INIT_TAP_TIME 100 +#define INIT_TAP_MIN_COUNT 2 +#define MPU_INIT_SMD_DELAY_THLD 3 +#define MPU_INIT_SMD_DELAY2_THLD 1 +#define MPU_INIT_SMD_THLD 3000 +#define MPU_DEFAULT_DMP_FREQ 200 #define MPL_PROD_KEY(ver, rev) (ver * 100 + rev) #define NUM_OF_PROD_REVS (ARRAY_SIZE(prod_rev_map)) /*---- MPU6050 Silicon Revisions ----*/ -#define MPU_SILICON_REV_A2 1 /* MPU6050A2 Device */ -#define MPU_SILICON_REV_B1 2 /* MPU6050B1 Device */ - -#define BIT_PRFTCH_EN 0x40 -#define BIT_CFG_USER_BANK 0x20 -#define BITS_MEM_SEL 0x1f -/* time stamp tolerance */ -#define TIME_STAMP_TOR (5) -#define MAX_CATCH_UP (5) -#define DEFAULT_ACCL_TRIM (16384) -#define MAX_FIFO_RATE (1000) -#define MIN_FIFO_RATE (4) -#define ONE_K_HZ (1000) - -/* flick related defines */ -#define DATA_INT (2097) -#define DATA_MSG_ON (262144) +#define MPU_SILICON_REV_A2 1 /* MPU6050A2 Device */ +#define MPU_SILICON_REV_B1 2 /* MPU6050B1 Device */ + +#define BIT_PRFTCH_EN 0x40 +#define BIT_CFG_USER_BANK 0x20 +#define BITS_MEM_SEL 0x1f + +#define TIME_STAMP_TOR 5 +#define MAX_CATCH_UP 5 +#define DEFAULT_ACCL_TRIM 16384 +#define DEFAULT_GYRO_TRIM 131 +#define MAX_FIFO_RATE 1000 +#define MAX_DMP_OUTPUT_RATE 200 +#define MIN_FIFO_RATE 4 +#define ONE_K_HZ 1000 +#define NS_PER_MS_SHIFT 20 /*tap related defines */ #define INV_TAP 0x08 -#define INV_NUM_TAP_AXES (3) +#define INV_NUM_TAP_AXES 3 #define INV_TAP_AXIS_X_POS 0x20 #define INV_TAP_AXIS_X_NEG 0x10 @@ -539,45 +677,38 @@ struct inv_mpu_slave { INV_TAP_AXIS_Z) #define INT_SRC_TAP 0x01 -#define INT_SRC_ORIENT 0x02 - -/*orientation related */ -#define INV_X_UP 0x01 -#define INV_X_DOWN 0x02 -#define INV_Y_UP 0x04 -#define INV_Y_DOWN 0x08 -#define INV_Z_UP 0x10 -#define INV_Z_DOWN 0x20 -#define INV_ORIENTATION_ALL 0x3F - -#define INV_ORIENTATION_FLIP 0x40 -#define INV_X_AXIS_INDEX (0x00) -#define INV_Y_AXIS_INDEX (0x01) -#define INV_Z_AXIS_INDEX (0x02) - -#define INV_ELEMENT_1 (0x0001) -#define INV_ELEMENT_2 (0x0002) -#define INV_ELEMENT_3 (0x0004) -#define INV_ELEMENT_4 (0x0008) -#define INV_ELEMENT_5 (0x0010) -#define INV_ELEMENT_6 (0x0020) -#define INV_ELEMENT_7 (0x0040) -#define INV_ELEMENT_8 (0x0080) -#define INV_ALL (0xFFFF) -#define INV_ELEMENT_MASK (0x00FF) -#define INV_GYRO_ACC_MASK (0x007E) +#define INT_SRC_DISPLAY_ORIENT 0x08 +#define INT_SRC_SHAKE 0x10 + +#define INV_X_AXIS_INDEX 0x00 +#define INV_Y_AXIS_INDEX 0x01 +#define INV_Z_AXIS_INDEX 0x02 + +#define INV_ELEMENT_1 0x0001 +#define INV_ELEMENT_2 0x0002 +#define INV_ELEMENT_3 0x0004 +#define INV_ELEMENT_4 0x0008 +#define INV_ELEMENT_5 0x0010 +#define INV_ELEMENT_6 0x0020 +#define INV_ELEMENT_7 0x0040 +#define INV_ELEMENT_8 0x0080 +#define INV_ALL 0xFFFF +#define INV_ELEMENT_MASK 0x00FF +#define INV_GYRO_ACC_MASK 0x007E +#define INV_ACCL_MASK 0x70 +#define INV_GYRO_MASK 0xE /* scan element definition */ enum inv_mpu_scan { INV_MPU_SCAN_QUAT_R = 0, INV_MPU_SCAN_QUAT_X, INV_MPU_SCAN_QUAT_Y, INV_MPU_SCAN_QUAT_Z, - INV_MPU_SCAN_GYRO_X, - INV_MPU_SCAN_GYRO_Y, - INV_MPU_SCAN_GYRO_Z, INV_MPU_SCAN_ACCL_X, INV_MPU_SCAN_ACCL_Y, INV_MPU_SCAN_ACCL_Z, + INV_MPU_SCAN_GYRO_X, + INV_MPU_SCAN_GYRO_Y, + INV_MPU_SCAN_GYRO_Z, INV_MPU_SCAN_MAGN_X, INV_MPU_SCAN_MAGN_Y, INV_MPU_SCAN_MAGN_Z, @@ -595,6 +726,12 @@ enum inv_filter_e { INV_FILTER_2100HZ_NOLPF, NUM_FILTER }; + +enum inv_slave_mode { + INV_MODE_SUSPEND, + INV_MODE_NORMAL, +}; + /*==== MPU6050B1 MEMORY ====*/ enum MPU_MEMORY_BANKS { MEM_RAM_BANK_0 = 0, @@ -613,6 +750,56 @@ enum MPU_MEMORY_BANKS { MPU_MEM_OTP_BANK_0 = 16 }; +/* IIO attribute address */ +enum MPU_IIO_ATTR_ADDR { + ATTR_DMP_SMD_ENABLE, + ATTR_DMP_SMD_THLD, + ATTR_DMP_SMD_DELAY_THLD, + ATTR_DMP_SMD_DELAY_THLD2, + ATTR_DMP_TAP_ON, + ATTR_DMP_TAP_THRESHOLD, + ATTR_DMP_TAP_MIN_COUNT, + ATTR_DMP_TAP_TIME, + ATTR_DMP_DISPLAY_ORIENTATION_ON, +/* *****above this line, are DMP features, power needs on/off */ +/* *****below this line, are DMP features, no power needed */ + ATTR_DMP_ON, + ATTR_DMP_INT_ON, + ATTR_DMP_EVENT_INT_ON, + ATTR_DMP_OUTPUT_RATE, + ATTR_DMP_QUATERNION_ON, +/* *****above this line, it is all DMP related features */ +/* *****below this line, it is all non-DMP related features */ + ATTR_MOTION_LPA_ON, + ATTR_MOTION_LPA_FREQ, + ATTR_MOTION_LPA_DURATION, + ATTR_MOTION_LPA_THRESHOLD, +/* *****above this line, it is non-DMP, power needs on/off */ +/* *****below this line, it is non-DMP, no needs to on/off power */ + ATTR_SELF_TEST_SAMPLES, + ATTR_SELF_TEST_THRESHOLD, + ATTR_GYRO_ENABLE, + ATTR_ACCL_ENABLE, + ATTR_COMPASS_ENABLE, + ATTR_POWER_STATE, /* this is fake sysfs for compatibility */ + ATTR_FIRMWARE_LOADED, + ATTR_SAMPLING_FREQ, +/* *****below this line, it is attributes only has show methods */ + ATTR_SELF_TEST, /* this has show-only methods but needs power on/off */ + ATTR_GYRO_MATRIX, + ATTR_ACCL_MATRIX, + ATTR_COMPASS_MATRIX, + ATTR_SECONDARY_NAME, +#ifdef CONFIG_INV_TESTING + ATTR_I2C_COUNTERS, + ATTR_REG_WRITE, + ATTR_DEBUG_SMD_ENABLE_TESTP1, + ATTR_DEBUG_SMD_ENABLE_TESTP2, + ATTR_DEBUG_SMD_EXE_STATE, + ATTR_DEBUG_SMD_DELAY_CNTR +#endif +}; + enum inv_accl_fs_e { INV_FS_02G = 0, INV_FS_04G, @@ -635,8 +822,6 @@ enum inv_clock_sel_e { NUM_CLK }; -void inv_wake_up(void); -int inv_set_power_state(struct inv_gyro_state_s *st, unsigned char power_on); ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t pos, size_t size); ssize_t inv_dmp_firmware_read(struct file *filp, @@ -649,43 +834,55 @@ int inv_mpu_probe_trigger(struct iio_dev *indio_dev); void inv_mpu_unconfigure_ring(struct iio_dev *indio_dev); void inv_mpu_remove_trigger(struct iio_dev *indio_dev); int inv_init_config_mpu3050(struct iio_dev *indio_dev); -int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st); -int set_3050_bypass(struct inv_gyro_state_s *st, int enable); -int inv_register_bma250_slave(struct inv_gyro_state_s *st); +int inv_get_silicon_rev_mpu6050(struct inv_mpu_iio_s *st); +int inv_get_silicon_rev_mpu6500(struct inv_mpu_iio_s *st); +int set_3050_bypass(struct inv_mpu_iio_s *st, bool enable); +int inv_register_mpu3050_slave(struct inv_mpu_iio_s *st); void inv_setup_reg_mpu3050(struct inv_reg_map_s *reg); -int set_power_mpu3050(struct inv_gyro_state_s *st, unsigned char power_on); -int set_inv_enable(struct iio_dev *indio_dev, unsigned long enable); -int inv_send_quaternion(struct inv_gyro_state_s *st, int on); -int inv_set_display_orient_interrupt_dmp(struct inv_gyro_state_s *st, int on); -int inv_enable_orientation_dmp(struct inv_gyro_state_s *st, int on); -int inv_set_fifo_rate(struct inv_gyro_state_s *st, unsigned long fifo_rate); -unsigned short inv_dmp_get_address(unsigned short key); -long inv_q30_mult(long a, long b); -int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, - unsigned int axis, unsigned short threshold); -int inv_set_min_taps_dmp(struct inv_gyro_state_s *st, unsigned int min_taps); -int inv_set_tap_time_dmp(struct inv_gyro_state_s *st, unsigned int time); -int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on); -int inv_i2c_read_base(struct inv_gyro_state_s *st, unsigned short i2c_addr, - unsigned char reg, unsigned short length, unsigned char *data); -int inv_i2c_single_write_base(struct inv_gyro_state_s *st, - unsigned short i2c_addr, unsigned char reg, unsigned char data); -int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, +int inv_switch_3050_gyro_engine(struct inv_mpu_iio_s *st, bool en); +int inv_switch_3050_accl_engine(struct inv_mpu_iio_s *st, bool en); +int set_power_mpu3050(struct inv_mpu_iio_s *st, bool power_on); +int set_inv_enable(struct iio_dev *indio_dev, bool enable); +int inv_set_interrupt_on_gesture_event(struct inv_mpu_iio_s *st, bool on); +int inv_send_quaternion(struct inv_mpu_iio_s *st, bool on); +int inv_set_display_orient_interrupt_dmp(struct inv_mpu_iio_s *st, bool on); +int inv_set_fifo_rate(struct inv_mpu_iio_s *st, u16 fifo_rate); +u16 inv_dmp_get_address(u16 key); +int inv_q30_mult(int a, int b); +int inv_set_tap_threshold_dmp(struct inv_mpu_iio_s *st, + u32 axis, u16 threshold); +int inv_set_min_taps_dmp(struct inv_mpu_iio_s *st, u16 min_taps); +int inv_set_tap_time_dmp(struct inv_mpu_iio_s *st, u16 time); +int inv_enable_tap_dmp(struct inv_mpu_iio_s *st, bool on); +int inv_i2c_read_base(struct inv_mpu_iio_s *st, u16 i2c_addr, + u8 reg, u16 length, u8 *data); +int inv_i2c_single_write_base(struct inv_mpu_iio_s *st, + u16 i2c_addr, u8 reg, u8 data); +int inv_do_test(struct inv_mpu_iio_s *st, int self_test_flag, int *gyro_result, int *accl_result); -int mpu_memory_write(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char const *data); -int mpu_memory_read(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char *data); -int inv_hw_self_test(struct inv_gyro_state_s *st); - -#define mem_w(a, b, c) mpu_memory_write(st->sl_handle,\ - st->i2c_addr, a, b, c) -#define mem_w_key(key, b, c) mpu_memory_write(st->sl_handle,\ - st->i2c_addr, inv_dmp_get_address(key), b, c) +int inv_hw_self_test(struct inv_mpu_iio_s *st); +int inv_hw_self_test_6500(struct inv_mpu_iio_s *st); +void inv_recover_setting(struct inv_mpu_iio_s *st); +int inv_power_up_self_test(struct inv_mpu_iio_s *st); +s64 get_time_ns(void); +int write_be32_key_to_mem(struct inv_mpu_iio_s *st, + u32 data, int key); +int inv_set_accel_bias_dmp(struct inv_mpu_iio_s *st); +int inv_send_sensor_data(struct inv_mpu_iio_s *st, u16 elements); +int inv_send_interrupt_word(struct inv_mpu_iio_s *st, bool on); +int mpu_memory_write(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 const *data); +int mpu_memory_read(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 *data); +int mpu_memory_write_unaligned(struct inv_mpu_iio_s *st, u16 key, int len, + u8 const *d); +/* used to print i2c data using pr_debug */ +char *wr_pr_debug_begin(u8 const *data, u32 len, char *string); +char *wr_pr_debug_end(char *string); + +#define mem_w(a, b, c) \ + mpu_memory_write(st, st->i2c_addr, a, b, c) +#define mem_w_key(key, b, c) mpu_memory_write_unaligned(st, key, b, c) #define inv_i2c_read(st, reg, len, data) \ inv_i2c_read_base(st, st->i2c_addr, reg, len, data) #define inv_i2c_single_write(st, reg, data) \ @@ -695,5 +892,6 @@ int inv_hw_self_test(struct inv_gyro_state_s *st); #define inv_secondary_write(reg, data) \ inv_i2c_single_write_base(st, st->plat_data.secondary_i2c_addr, \ reg, data) -#endif /* #ifndef _INV_GYRO_H_ */ + +#endif /* #ifndef _INV_MPU_IIO_H_ */ diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c index 9b2bbcfa6b4..ec90fc82428 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c @@ -17,11 +17,13 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro_misc.c - * @brief A sysfs device driver for Invensense gyroscopes. - * @details This file is part of inv_gyro driver code + * @file inv_mpu_misc.c + * @brief A sysfs device driver for Invensense mpu. + * @details This file is part of invensense mpu driver code */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -35,16 +37,16 @@ #include #include #include +#include #include "inv_mpu_iio.h" -/* - Defines -*/ +#include "../../inv_test/inv_counters.h" + /* DMP defines */ #define DMP_ORIENTATION_TIME 500 #define DMP_ORIENTATION_ANGLE 60 #define DMP_DEFAULT_FIFO_RATE 200 -#define DMP_TAP_SCALE (767603923/5) +#define DMP_TAP_SCALE (767603923 / 5) #define DMP_MULTI_SHIFT 30 #define DMP_MULTI_TAP_TIME 500 #define DMP_SHAKE_REJECT_THRESH 100 @@ -54,6 +56,8 @@ #define DMP_PRECISION 1000 #define DMP_MAX_DIVIDER 4 #define DMP_MAX_MIN_TAPS 4 +#define DMP_IMAGE_CRC_VALUE 0xb0338aac +#define DMP_IMAGE_SIZE 2976 /*--- Test parameters defaults --- */ #define DEF_OLDEST_SUPP_PROD_REV 8 @@ -61,17 +65,13 @@ /* sample rate */ #define DEF_SELFTEST_SAMPLE_RATE 0 -/* LPF parameter */ -#define DEF_SELFTEST_LPF_PARA 1 /* full scale setting dps */ #define DEF_SELFTEST_GYRO_FULL_SCALE (0 << 3) #define DEF_SELFTEST_ACCL_FULL_SCALE (2 << 3) -#define DEF_SELFTEST_GYRO_SENS (32768/250) +#define DEF_SELFTEST_GYRO_SENS (32768 / 250) /* wait time before collecting data */ -#define DEF_GYRO_WAIT_TIME 50 +#define DEF_GYRO_WAIT_TIME 10 #define DEF_ST_STABLE_TIME 200 -#define DEF_GYRO_PACKET_THRESH DEF_GYRO_WAIT_TIME -#define DEF_GYRO_THRESH 10 #define DEF_GYRO_SCALE 131 #define DEF_ST_PRECISION 1000 #define DEF_ST_ACCL_FULL_SCALE 8000UL @@ -79,8 +79,12 @@ #define DEF_ST_TRY_TIMES 2 #define DEF_ST_COMPASS_RESULT_SHIFT 2 #define DEF_ST_ACCEL_RESULT_SHIFT 1 +#define DEF_ST_OTP0_THRESH 60 +#define DEF_ST_ABS_THRESH 20 +#define DEF_ST_TOR 2 -#define DEF_ST_COMPASS_WAIT (10*1000) +#define DEF_ST_COMPASS_WAIT_MIN (10 * 1000) +#define DEF_ST_COMPASS_WAIT_MAX (15 * 1000) #define DEF_ST_COMPASS_TRY_TIMES 10 #define DEF_ST_COMPASS_8963_SHIFT 2 @@ -103,7 +107,7 @@ static struct test_setup_t test_setup = { .gyro_sens = DEF_SELFTEST_GYRO_SENS, .sample_rate = DEF_SELFTEST_SAMPLE_RATE, - .lpf = DEF_SELFTEST_LPF_PARA, + .lpf = INV_FILTER_188HZ, .fsr = DEF_SELFTEST_GYRO_FULL_SCALE, .accl_fs = DEF_SELFTEST_ACCL_FULL_SCALE }; @@ -116,78 +120,148 @@ static const struct prod_rev_map_t prod_rev_map[] = { {MPL_PROD_KEY(0, 3), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 4), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 5), MPU_SILICON_REV_A2, 131, 16384}, - {MPL_PROD_KEY(0, 6), MPU_SILICON_REV_A2, 131, 16384}, /* (A2/C2-1) */ - /* prod_ver = 1, forced to 0 for MPU6050 A2 */ + {MPL_PROD_KEY(0, 6), MPU_SILICON_REV_A2, 131, 16384}, + /* prod_ver = 1 */ {MPL_PROD_KEY(0, 7), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 8), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 9), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 10), MPU_SILICON_REV_A2, 131, 16384}, - {MPL_PROD_KEY(0, 11), MPU_SILICON_REV_A2, 131, 16384}, /* (A2/D2-1) */ + {MPL_PROD_KEY(0, 11), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 12), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 13), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 14), MPU_SILICON_REV_A2, 131, 16384}, {MPL_PROD_KEY(0, 15), MPU_SILICON_REV_A2, 131, 16384}, - {MPL_PROD_KEY(0, 27), MPU_SILICON_REV_A2, 131, 16384}, /* (A2/D4) */ + {MPL_PROD_KEY(0, 27), MPU_SILICON_REV_A2, 131, 16384}, /* prod_ver = 1 */ - {MPL_PROD_KEY(1, 16), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-1) */ - {MPL_PROD_KEY(1, 17), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-2) */ - {MPL_PROD_KEY(1, 18), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-3) */ - {MPL_PROD_KEY(1, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-4) */ - {MPL_PROD_KEY(1, 20), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D2-5) */ - {MPL_PROD_KEY(1, 28), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/D4) */ - {MPL_PROD_KEY(1, 1), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-1) */ - {MPL_PROD_KEY(1, 2), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-2) */ - {MPL_PROD_KEY(1, 3), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-3) */ - {MPL_PROD_KEY(1, 4), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-4) */ - {MPL_PROD_KEY(1, 5), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-5) */ - {MPL_PROD_KEY(1, 6), MPU_SILICON_REV_B1, 131, 16384}, /* (B1/E1-6) */ + {MPL_PROD_KEY(1, 16), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 17), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 18), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 19), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 20), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 28), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 1), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 2), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 3), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 4), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 5), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(1, 6), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 2 */ - {MPL_PROD_KEY(2, 7), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-1) */ - {MPL_PROD_KEY(2, 8), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-2) */ - {MPL_PROD_KEY(2, 9), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-3) */ - {MPL_PROD_KEY(2, 10), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-4) */ - {MPL_PROD_KEY(2, 11), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-5) */ - {MPL_PROD_KEY(2, 12), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E1-6) */ - {MPL_PROD_KEY(2, 29), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/D4) */ + {MPL_PROD_KEY(2, 7), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 8), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 9), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 10), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 11), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 12), MPU_SILICON_REV_B1, 131, 16384}, + {MPL_PROD_KEY(2, 29), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 3 */ - {MPL_PROD_KEY(3, 30), MPU_SILICON_REV_B1, 131, 16384}, /* (B2/E2) */ + {MPL_PROD_KEY(3, 30), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 4 */ - {MPL_PROD_KEY(4, 31), MPU_SILICON_REV_B1, 131, 8192}, /* (B2/F1) */ - {MPL_PROD_KEY(4, 1), MPU_SILICON_REV_B1, 131, 8192}, /* (B3/F1) */ - {MPL_PROD_KEY(4, 3), MPU_SILICON_REV_B1, 131, 8192}, /* (B4/F1) */ + {MPL_PROD_KEY(4, 31), MPU_SILICON_REV_B1, 131, 8192}, + {MPL_PROD_KEY(4, 1), MPU_SILICON_REV_B1, 131, 8192}, + {MPL_PROD_KEY(4, 3), MPU_SILICON_REV_B1, 131, 8192}, /* prod_ver = 5 */ - {MPL_PROD_KEY(5, 3), MPU_SILICON_REV_B1, 131, 16384}, /* (B4/F1) */ + {MPL_PROD_KEY(5, 3), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 6 */ - {MPL_PROD_KEY(6, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(6, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 7 */ - {MPL_PROD_KEY(7, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(7, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 8 */ - {MPL_PROD_KEY(8, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(8, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 9 */ - {MPL_PROD_KEY(9, 19), MPU_SILICON_REV_B1, 131, 16384}, /* (B5/E2) */ + {MPL_PROD_KEY(9, 19), MPU_SILICON_REV_B1, 131, 16384}, /* prod_ver = 10 */ - {MPL_PROD_KEY(10, 19), MPU_SILICON_REV_B1, 131, 16384} /* (B5/E2) */ + {MPL_PROD_KEY(10, 19), MPU_SILICON_REV_B1, 131, 16384} }; /* - List of product software revisions - - NOTE : - software revision 0 falls back to the old detection method - based off the product version and product revision per the - table above +* List of product software revisions +* +* NOTE : +* software revision 0 falls back to the old detection method +* based off the product version and product revision per the +* table above */ static const struct prod_rev_map_t sw_rev_map[] = { {0, 0, 0, 0}, {1, MPU_SILICON_REV_B1, 131, 8192}, /* rev C */ {2, MPU_SILICON_REV_B1, 131, 16384} /* rev D */ }; +static const u16 accl_6500_st_tb[256] = { +655, 662, 669, 675, 682, 689, 696, 703, +710, 717, 724, 731, 738, 746, 753, 761, +768, 776, 784, 792, 800, 808, 816, 824, +832, 840, 849, 857, 866, 875, 883, 892, +901, 910, 919, 928, 938, 947, 957, 966, +976, 985, 995, 1005, 1015, 1026, 1036, 1046, +1057, 1067, 1078, 1089, 1099, 1110, 1122, 1133, +1144, 1156, 1167, 1179, 1191, 1202, 1215, 1227, +1239, 1251, 1264, 1276, 1289, 1302, 1315, 1328, +1342, 1355, 1369, 1382, 1396, 1410, 1424, 1438, +1453, 1467, 1482, 1497, 1512, 1527, 1542, 1558, +1573, 1589, 1605, 1621, 1637, 1653, 1670, 1687, +1703, 1720, 1738, 1755, 1773, 1790, 1808, 1826, +1845, 1863, 1882, 1900, 1920, 1939, 1958, 1978, +1997, 2017, 2038, 2058, 2079, 2099, 2120, 2142, +2163, 2185, 2206, 2228, 2251, 2273, 2296, 2319, +2342, 2366, 2389, 2413, 2437, 2462, 2486, 2511, +2536, 2562, 2587, 2613, 2639, 2666, 2692, 2719, +2746, 2774, 2802, 2830, 2858, 2886, 2915, 2944, +2974, 3004, 3034, 3064, 3095, 3126, 3157, 3188, +3220, 3253, 3285, 3318, 3351, 3385, 3418, 3453, +3487, 3522, 3557, 3593, 3629, 3665, 3702, 3739, +3776, 3814, 3852, 3891, 3929, 3969, 4008, 4048, +4089, 4130, 4171, 4213, 4255, 4298, 4341, 4384, +4428, 4472, 4517, 4562, 4608, 4654, 4700, 4747, +4795, 4843, 4891, 4940, 4989, 5039, 5090, 5140, +5192, 5244, 5296, 5349, 5403, 5457, 5511, 5566, +5622, 5678, 5735, 5792, 5850, 5909, 5968, 6028, +6088, 6149, 6210, 6272, 6335, 6398, 6462, 6527, +6592, 6658, 6725, 6792, 6860, 6929, 6998, 7068, +7139, 7210, 7282, 7355, 7428, 7503, 7578, 7653, +7730, 7807, 7885, 7964, 8044, 8124, 8206, 8288, +}; + +static const u16 gyro_6500_st_tb[256] = { +2621, 2648, 2674, 2701, 2728, 2755, 2783, 2811, +2839, 2867, 2896, 2925, 2954, 2983, 3013, 3043, +3074, 3105, 3136, 3167, 3199, 3231, 3263, 3296, +3329, 3362, 3395, 3429, 3464, 3498, 3533, 3569, +3604, 3640, 3677, 3714, 3751, 3788, 3826, 3864, +3903, 3942, 3981, 4021, 4061, 4102, 4143, 4185, +4226, 4269, 4311, 4354, 4398, 4442, 4486, 4531, +4577, 4622, 4669, 4715, 4762, 4810, 4858, 4907, +4956, 5005, 5055, 5106, 5157, 5209, 5261, 5313, +5366, 5420, 5474, 5529, 5584, 5640, 5696, 5753, +5811, 5869, 5928, 5987, 6047, 6107, 6168, 6230, +6292, 6355, 6419, 6483, 6548, 6613, 6680, 6746, +6814, 6882, 6951, 7020, 7091, 7161, 7233, 7305, +7378, 7452, 7527, 7602, 7678, 7755, 7832, 7911, +7990, 8070, 8150, 8232, 8314, 8397, 8481, 8566, +8652, 8738, 8826, 8914, 9003, 9093, 9184, 9276, +9369, 9462, 9557, 9653, 9749, 9847, 9945, 10044, +10145, 10246, 10349, 10452, 10557, 10662, 10769, 10877, +10985, 11095, 11206, 11318, 11432, 11546, 11661, 11778, +11896, 12015, 12135, 12256, 12379, 12502, 12627, 12754, +12881, 13010, 13140, 13272, 13404, 13538, 13674, 13810, +13949, 14088, 14229, 14371, 14515, 14660, 14807, 14955, +15104, 15255, 15408, 15562, 15718, 15875, 16034, 16194, +16356, 16519, 16685, 16851, 17020, 17190, 17362, 17536, +17711, 17888, 18067, 18248, 18430, 18614, 18801, 18989, +19179, 19370, 19564, 19760, 19957, 20157, 20358, 20562, +20768, 20975, 21185, 21397, 21611, 21827, 22045, 22266, +22488, 22713, 22940, 23170, 23401, 23635, 23872, 24111, +24352, 24595, 24841, 25089, 25340, 25594, 25850, 26108, +26369, 26633, 26899, 27168, 27440, 27714, 27992, 28271, +28554, 28840, 29128, 29419, 29714, 30011, 30311, 30614, +30920, 31229, 31542, 31857, 32176, 32497, 32822, 33151, +}; static const int accl_st_tb[31] = { 340, 351, 363, 375, 388, 401, 414, 428, 443, 458, 473, 489, 506, 523, 541, 559, 578, 597, 617, 638, 660, 682, 705, 729, 753, 779, 805, 832, 860, 889, 919}; + static const int gyro_6050_st_tb[31] = { 3275, 3425, 3583, 3748, 3920, 4100, 4289, 4486, 4693, 4909, 5134, 5371, 5618, 5876, 6146, 6429, @@ -227,19 +301,33 @@ static const int gyro_3500_st_tb[255] = { 28538, 28823, 29112, 29403, 29697, 29994, 30294, 30597, 30903, 31212, 31524, 31839, 32157, 32479, 32804}; -int mpu_memory_write(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char const *data) +char *wr_pr_debug_begin(u8 const *data, u32 len, char *string) +{ + int ii; + string = kmalloc(len * 2 + 1, GFP_KERNEL); + for (ii = 0; ii < len; ii++) + sprintf(&string[ii * 2], "%02X", data[ii]); + string[len * 2] = 0; + return string; +} + +char *wr_pr_debug_end(char *string) +{ + kfree(string); + return ""; +} + +int mpu_memory_write(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 const *data) { - unsigned char bank[2]; - unsigned char addr[2]; - unsigned char buf[513]; + u8 bank[2]; + u8 addr[2]; + u8 buf[513]; struct i2c_msg msgs[3]; int res; - if (!data || !i2c_adap) + if (!data || !st) return -EINVAL; if (len >= (sizeof(buf) - 1)) @@ -248,7 +336,7 @@ int mpu_memory_write(struct i2c_adapter *i2c_adap, bank[0] = REG_BANK_SEL; bank[1] = mem_addr >> 8; - addr[0] = REG_MEM_START; + addr[0] = REG_MEM_START_ADDR; addr[1] = mem_addr & 0xFF; buf[0] = REG_MEM_RW; @@ -267,36 +355,48 @@ int mpu_memory_write(struct i2c_adapter *i2c_adap, msgs[2].addr = mpu_addr; msgs[2].flags = 0; - msgs[2].buf = (unsigned char *)buf; + msgs[2].buf = (u8 *)buf; msgs[2].len = len + 1; - res = i2c_transfer(i2c_adap, msgs, 3); + INV_I2C_INC_MPUWRITE(3 + 3 + (2 + len)); +#if CONFIG_DYNAMIC_DEBUG + { + char *write = 0; + pr_debug("%s WM%02X%02X%02X%s%s - %d\n", st->hw->name, + mpu_addr, bank[1], addr[1], + wr_pr_debug_begin(data, len, write), + wr_pr_debug_end(write), + len); + } +#endif + + res = i2c_transfer(st->sl_handle, msgs, 3); if (res != 3) { if (res >= 0) res = -EIO; return res; - } else + } else { return 0; + } } -int mpu_memory_read(struct i2c_adapter *i2c_adap, - unsigned char mpu_addr, - unsigned short mem_addr, - unsigned int len, unsigned char *data) + +int mpu_memory_read(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, + u32 len, u8 *data) { - unsigned char bank[2]; - unsigned char addr[2]; - unsigned char buf; + u8 bank[2]; + u8 addr[2]; + u8 buf; struct i2c_msg msgs[4]; int res; - if (!data || !i2c_adap) + if (!data || !st) return -EINVAL; bank[0] = REG_BANK_SEL; bank[1] = mem_addr >> 8; - addr[0] = REG_MEM_START; + addr[0] = REG_MEM_START_ADDR; addr[1] = mem_addr & 0xFF; buf = REG_MEM_RW; @@ -322,53 +422,127 @@ int mpu_memory_read(struct i2c_adapter *i2c_adap, msgs[3].buf = data; msgs[3].len = len; - res = i2c_transfer(i2c_adap, msgs, 4); + res = i2c_transfer(st->sl_handle, msgs, 4); if (res != 4) { if (res >= 0) res = -EIO; - return res; } else - return 0; + res = 0; + + INV_I2C_INC_MPUWRITE(3 + 3 + 3); + INV_I2C_INC_MPUREAD(len); +#if CONFIG_DYNAMIC_DEBUG + { + char *read = 0; + pr_debug("%s RM%02X%02X%02X%02X - %s%s\n", st->hw->name, + mpu_addr, bank[1], addr[1], len, + wr_pr_debug_begin(data, len, read), + wr_pr_debug_end(read)); + } +#endif + + return res; +} + +int mpu_memory_write_unaligned(struct inv_mpu_iio_s *st, u16 key, int len, + u8 const *d) +{ + int addr; + int start, end; + int len1, len2; + int result = 0; + if (len > MPU_MEM_BANK_SIZE) + return -EINVAL; + addr = inv_dmp_get_address(key); + start = (addr >> 8); + end = ((addr + len - 1) >> 8); + if (start == end) { + result = mpu_memory_write(st, st->i2c_addr, addr, len, d); + } else { + end <<= 8; + len1 = end - addr; + len2 = len - len1; + result = mpu_memory_write(st, st->i2c_addr, addr, len1, d); + result |= mpu_memory_write(st, st->i2c_addr, end, len2, + d + len1); + } + + return result; } /** - * @internal - * @brief Inverse lookup of the index of an MPL product key . - * @param key - * the MPL product indentifier also referred to as 'key'. - * @return the index position of the key in the array, -1 if not found. + * index_of_key()- Inverse lookup of the index of an MPL product key . + * @key: the MPL product indentifier also referred to as 'key'. */ -static short index_of_key(unsigned short key) +static short index_of_key(u16 key) { int i; for (i = 0; i < NUM_OF_PROD_REVS; i++) if (prod_rev_map[i].mpl_product_key == key) return (short)i; - return -1; + return -EINVAL; } -int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st) +int inv_get_silicon_rev_mpu6500(struct inv_mpu_iio_s *st) +{ + struct inv_chip_info_s *chip_info = &st->chip_info; + int result; + u8 whoami, sw_rev; + + result = inv_i2c_read(st, REG_WHOAMI, 1, &whoami); + if (result) + return result; + if (whoami != MPU6500_ID && whoami != MPU9250_ID) + return -EINVAL; + + /*memory read need more time after power up */ + msleep(POWER_UP_TIME); + result = mpu_memory_read(st, st->i2c_addr, + MPU6500_MEM_REV_ADDR, 1, &sw_rev); + if (sw_rev == 0) { + pr_warning("Rev 0 of MPU6500\n"); + pr_warning("can't sit with other devices in same I2C bus\n"); + } + if (result) + return result; + if (sw_rev > MPU6500_REV) + return -EINVAL; + + /* these values are place holders and not real values */ + chip_info->product_id = MPU6500_PRODUCT_REVISION; + chip_info->product_revision = MPU6500_PRODUCT_REVISION; + chip_info->silicon_revision = MPU6500_PRODUCT_REVISION; + chip_info->software_revision = sw_rev; + chip_info->gyro_sens_trim = DEFAULT_GYRO_TRIM; + chip_info->accl_sens_trim = DEFAULT_ACCL_TRIM; + chip_info->multi = 1; + + return 0; +} + +int inv_get_silicon_rev_mpu6050(struct inv_mpu_iio_s *st) { int result; struct inv_reg_map_s *reg; - unsigned char prod_ver = 0x00, prod_rev = 0x00; + u8 prod_ver = 0x00, prod_rev = 0x00; struct prod_rev_map_t *p_rev; - unsigned char bank = + u8 bank = (BIT_PRFTCH_EN | BIT_CFG_USER_BANK | MPU_MEM_OTP_BANK_0); - unsigned short mem_addr = ((bank << 8) | MEM_ADDR_PROD_REV); - unsigned short key; - unsigned char regs[5]; - unsigned short sw_rev; + u16 mem_addr = ((bank << 8) | MEM_ADDR_PROD_REV); + u16 key; + u8 regs[5]; + u16 sw_rev; short index; struct inv_chip_info_s *chip_info = &st->chip_info; reg = &st->reg; - result = inv_i2c_read(st, reg->product_id, 1, &prod_ver); + result = inv_i2c_read(st, REG_PRODUCT_ID, 1, &prod_ver); if (result) return result; prod_ver &= 0xf; + /*memory read need more time after power up */ msleep(POWER_UP_TIME); - result = mpu_memory_read(st->sl_handle, st->i2c_addr, mem_addr, + result = mpu_memory_read(st, st->i2c_addr, mem_addr, 1, &prod_rev); if (result) return result; @@ -390,19 +564,20 @@ int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st) if (sw_rev == 0) { key = MPL_PROD_KEY(prod_ver, prod_rev); if (key == 0) - return -1; + return -EINVAL; index = index_of_key(key); - if (index == -1 || index >= NUM_OF_PROD_REVS) - return -1; + if (index < 0 || index >= NUM_OF_PROD_REVS) + return -EINVAL; /* check MPL is compiled for this device */ if (prod_rev_map[index].silicon_rev != MPU_SILICON_REV_B1) - return -1; + return -EINVAL; p_rev = (struct prod_rev_map_t *)&prod_rev_map[index]; /* if valid, use the software product key */ - } else if (sw_rev < ARRAY_SIZE(sw_rev_map)) + } else if (sw_rev < ARRAY_SIZE(sw_rev_map)) { p_rev = (struct prod_rev_map_t *)&sw_rev_map[sw_rev]; - else - return -1; + } else { + return -EINVAL; + } chip_info->product_id = prod_ver; chip_info->product_revision = prod_rev; chip_info->silicon_revision = p_rev->silicon_rev; @@ -411,37 +586,39 @@ int inv_get_silicon_rev_mpu6050(struct inv_gyro_state_s *st) chip_info->accl_sens_trim = p_rev->accel_trim; if (chip_info->accl_sens_trim == 0) chip_info->accl_sens_trim = DEFAULT_ACCL_TRIM; - chip_info->multi = DEFAULT_ACCL_TRIM/chip_info->accl_sens_trim; + chip_info->multi = DEFAULT_ACCL_TRIM / chip_info->accl_sens_trim; if (chip_info->multi != 1) - pr_err("multi is %d\n", chip_info->multi); + pr_info("multi is %d\n", chip_info->multi); return result; } + /** - * @internal - * @brief read the accelerometer hardware self-test bias shift calculated - * during final production test and stored in chip non-volatile memory. - * @param st - * serial interface handle to allow serial communication with the - * device, both gyro and accelerometer. - * @param ct_shift_prod - * A pointer to an array of 3 float elements to hold the values + * read_accel_hw_self_test_prod_shift()- read the accelerometer hardware + * self-test bias shift calculated + * during final production test and + * stored in chip non-volatile memory. + * @st: main data structure. + * @st_prod: A pointer to an array of 3 elements to hold the values * for production hardware self-test bias shifts returned to the * user. - * @return 0 on success, or a non-zero error code otherwise. */ -static int read_accel_hw_self_test_prod_shift(struct inv_gyro_state_s *st, +static int read_accel_hw_self_test_prod_shift(struct inv_mpu_iio_s *st, int *st_prod) { - unsigned char regs[4]; - unsigned char shift_code[3]; + u8 regs[4]; + u8 shift_code[3]; int result, i; - st_prod[0] = st_prod[1] = st_prod[2] = 0; + + st_prod[0] = 0; + st_prod[1] = 0; + st_prod[2] = 0; result = inv_i2c_read(st, REG_ST_GCT_X, ARRAY_SIZE(regs), regs); + if (result) return result; if ((0 == regs[0]) && (0 == regs[1]) && - (0 == regs[2]) && (0 == regs[3])) - return -1; + (0 == regs[2]) && (0 == regs[3])) + return -EINVAL; shift_code[X] = ((regs[0] & 0xE0) >> 3) | ((regs[3] & 0x30) >> 4); shift_code[Y] = ((regs[1] & 0xE0) >> 3) | ((regs[3] & 0x0C) >> 2); shift_code[Z] = ((regs[2] & 0xE0) >> 3) | (regs[3] & 0x03); @@ -450,31 +627,41 @@ static int read_accel_hw_self_test_prod_shift(struct inv_gyro_state_s *st, st_prod[i] = test_setup.accl_sens[i]* accl_st_tb[shift_code[i] - 1]; } + return 0; } -static int inv_check_accl_self_test(struct inv_gyro_state_s *st, +/** +* inv_check_accl_self_test()- check accel self test. this function returns +* zero as success. A non-zero return value +* indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +*/ +static int inv_check_accl_self_test(struct inv_mpu_iio_s *st, int *reg_avg, int *st_avg){ int gravity, reg_z_avg, g_z_sign, fs, j, ret_val; int tmp1; int st_shift_prod[THREE_AXIS], st_shift_cust[THREE_AXIS]; int st_shift_ratio[THREE_AXIS]; + if (st->chip_info.software_revision < DEF_OLDEST_SUPP_SW_REV && - st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) + st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) return 0; fs = DEF_ST_ACCL_FULL_SCALE; /* assume +/- 2 mg as typical */ g_z_sign = 1; ret_val = 0; - test_setup.accl_sens[X] = (unsigned int)(DEF_ST_SCALE * + test_setup.accl_sens[X] = (u32)(DEF_ST_SCALE * DEF_ST_PRECISION / fs); - test_setup.accl_sens[Y] = (unsigned int)(DEF_ST_SCALE * + test_setup.accl_sens[Y] = (u32)(DEF_ST_SCALE * DEF_ST_PRECISION / fs); - test_setup.accl_sens[Z] = (unsigned int)(DEF_ST_SCALE * + test_setup.accl_sens[Z] = (u32)(DEF_ST_SCALE * DEF_ST_PRECISION / fs); if (MPL_PROD_KEY(st->chip_info.product_id, - st->chip_info.product_revision) == - MPU_PRODUCT_KEY_B1_E1_5) { + st->chip_info.product_revision) == + MPU_PRODUCT_KEY_B1_E1_5) { /* half sensitivity Z accelerometer parts */ test_setup.accl_sens[Z] /= 2; } else { @@ -484,35 +671,47 @@ static int inv_check_accl_self_test(struct inv_gyro_state_s *st, test_setup.accl_sens[Z] /= st->chip_info.multi; } gravity = test_setup.accl_sens[Z]; - reg_z_avg = reg_avg[Z] - g_z_sign * gravity*DEF_ST_PRECISION; - read_accel_hw_self_test_prod_shift(st, st_shift_prod); + reg_z_avg = reg_avg[Z] - g_z_sign * gravity * DEF_ST_PRECISION; + ret_val = read_accel_hw_self_test_prod_shift(st, st_shift_prod); + if (ret_val) + return ret_val; + for (j = 0; j < 3; j++) { st_shift_cust[j] = abs(reg_avg[j] - st_avg[j]); if (st_shift_prod[j]) { tmp1 = st_shift_prod[j]/DEF_ST_PRECISION; - st_shift_ratio[j] = st_shift_cust[j]/tmp1 - - DEF_ST_PRECISION; + st_shift_ratio[j] = abs(st_shift_cust[j]/tmp1 + - DEF_ST_PRECISION); if (st_shift_ratio[j] > DEF_ACCEL_ST_SHIFT_DELTA) ret_val |= 1 << j; - if (st_shift_ratio[j] < -DEF_ACCEL_ST_SHIFT_DELTA) - ret_val |= 1 << j; } else { if (st_shift_cust[j] < - DEF_ACCEL_ST_SHIFT_MIN*gravity) + DEF_ACCEL_ST_SHIFT_MIN * gravity) ret_val |= 1 << j; if (st_shift_cust[j] > - DEF_ACCEL_ST_SHIFT_MAX*gravity) + DEF_ACCEL_ST_SHIFT_MAX * gravity) ret_val |= 1 << j; } } + return ret_val; } -static int inv_check_3500_gyro_self_test(struct inv_gyro_state_s *st, + +/** +* inv_check_3500_gyro_self_test() check gyro self test. this function returns +* zero as success. A non-zero return value +* indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +*/ + +static int inv_check_3500_gyro_self_test(struct inv_mpu_iio_s *st, int *reg_avg, int *st_avg){ int result; int gst[3], ret_val; int gst_otp[3], i; - unsigned char st_code[THREE_AXIS]; + u8 st_code[THREE_AXIS]; ret_val = 0; for (i = 0; i < 3; i++) @@ -520,90 +719,172 @@ static int inv_check_3500_gyro_self_test(struct inv_gyro_state_s *st, result = inv_i2c_read(st, REG_3500_OTP, THREE_AXIS, st_code); if (result) return result; - gst_otp[0] = gst_otp[1] = gst_otp[2] = 0; + gst_otp[0] = 0; + gst_otp[1] = 0; + gst_otp[2] = 0; for (i = 0; i < 3; i++) { if (st_code[i] != 0) gst_otp[i] = gyro_3500_st_tb[st_code[i] - 1]; } + /* check self test value passing criterion. Using the DEF_ST_TOR + * for certain degree of tolerance */ for (i = 0; i < 3; i++) { if (gst_otp[i] == 0) { - if (abs(gst[i])*4 < 60*2*DEF_ST_PRECISION* - DEF_GYRO_SCALE) - ret_val |= (1< DEF_GYRO_CT_SHIFT_DELTA) - ret_val |= (1< 20*2*DEF_ST_PRECISION*DEF_GYRO_SCALE) - ret_val |= (1< DEF_ST_TOR * DEF_ST_ABS_THRESH * + DEF_ST_PRECISION * DEF_GYRO_SCALE) + ret_val |= (1 << i); } return ret_val; } -static int inv_check_6050_gyro_self_test(struct inv_gyro_state_s *st, + +/** +* inv_check_6050_gyro_self_test() - check 6050 gyro self test. this function +* returns zero as success. A non-zero return +* value indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +*/ +static int inv_check_6050_gyro_self_test(struct inv_mpu_iio_s *st, int *reg_avg, int *st_avg){ int result; int ret_val; int ct_shift_prod[3], st_shift_cust[3], st_shift_ratio[3], i; - unsigned char regs[3]; + u8 regs[3]; + if (st->chip_info.software_revision < DEF_OLDEST_SUPP_SW_REV && - st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) + st->chip_info.product_revision < DEF_OLDEST_SUPP_PROD_REV) return 0; ret_val = 0; result = inv_i2c_read(st, REG_ST_GCT_X, 3, regs); + if (result) + return result; regs[X] &= 0x1f; regs[Y] &= 0x1f; regs[Z] &= 0x1f; - for (i = 0; i < 3; i++) { if (regs[i] != 0) ct_shift_prod[i] = gyro_6050_st_tb[regs[i] - 1]; else ct_shift_prod[i] = 0; } + + for (i = 0; i < 3; i++) { st_shift_cust[i] = abs(reg_avg[i] - st_avg[i]); if (ct_shift_prod[i]) { - st_shift_ratio[i] = st_shift_cust[i]/ - ct_shift_prod[i] - DEF_ST_PRECISION; + st_shift_ratio[i] = abs(st_shift_cust[i] / + ct_shift_prod[i] - DEF_ST_PRECISION); if (st_shift_ratio[i] > DEF_GYRO_CT_SHIFT_DELTA) ret_val |= 1 << i; - if (st_shift_ratio[i] < -DEF_GYRO_CT_SHIFT_DELTA) - ret_val |= 1 << i; } else { - if (st_shift_cust[i] < DEF_ST_PRECISION* - DEF_GYRO_CT_SHIFT_MIN*test_setup.gyro_sens) + if (st_shift_cust[i] < DEF_ST_PRECISION * + DEF_GYRO_CT_SHIFT_MIN * test_setup.gyro_sens) ret_val |= 1 << i; - if (st_shift_cust[i] > DEF_ST_PRECISION* - DEF_GYRO_CT_SHIFT_MAX*test_setup.gyro_sens) + if (st_shift_cust[i] > DEF_ST_PRECISION * + DEF_GYRO_CT_SHIFT_MAX * test_setup.gyro_sens) ret_val |= 1 << i; } } + /* check for absolute value passing criterion. Using DEF_ST_TOR + * for certain degree of tolerance */ + for (i = 0; i < 3; i++) { + if (abs(reg_avg[i]) > DEF_ST_TOR * DEF_ST_ABS_THRESH * + DEF_ST_PRECISION * DEF_GYRO_SCALE) + ret_val |= (1 << i); + } + + return ret_val; +} + +/** +* inv_check_6500_self_test() - check 6050 gyro self test. this function +* returns zero as success. A non-zero return +* value indicates failure in self test. +* @*st: main data structure. +* @*reg_avg: average value of normal test. +* @*st_avg: average value of self test +* @is_gyro: switch for gyro/accl. +*/ +static int inv_check_6500_self_test(struct inv_mpu_iio_s *st, + int *reg_avg, int *st_avg, bool is_gyro) +{ + int ret_val, result; + int ct_shift_prod[3], st_shift_cust[3], st_shift_ratio[3], i; + u8 regs[3]; + const u16 *st_tb; + + ret_val = 0; + if (is_gyro) { + st_tb = gyro_6500_st_tb; + result = inv_i2c_read(st, REG_6500_XG_ST_DATA, 3, regs); + } else { + st_tb = accl_6500_st_tb; + result = inv_i2c_read(st, REG_6500_XA_ST_DATA, 3, regs); + } + pr_debug("isgyro=%d, OTP:%d, %d, %d\n", is_gyro, regs[0], + regs[1], regs[2]); + + for (i = 0; i < 3; i++) { + if (regs[i] != 0) + ct_shift_prod[i] = st_tb[regs[i] - 1]; + else + ct_shift_prod[i] = 0; + } + for (i = 0; i < 3; i++) { - if (abs(reg_avg[i])*4 > 20*2*DEF_ST_PRECISION*DEF_GYRO_SCALE) - ret_val |= (1<self_test.threshold); + if (st_shift_ratio[i] > ST_THRESHOLD_MULTIPLIER * + st->self_test.threshold) + ret_val |= 1 << i; + } } + return ret_val; } /** * inv_do_test() - do the actual test of self testing */ -int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, +int inv_do_test(struct inv_mpu_iio_s *st, int self_test_flag, int *gyro_result, int *accl_result) { struct inv_reg_map_s *reg; int result, i, j, packet_size; - unsigned char data[BYTES_PER_SENSOR * 2], has_accl; - int fifo_count, packet_count, ind; + u8 data[BYTES_PER_SENSOR * 2], d; + bool has_accl; + int fifo_count, packet_count, ind, s; reg = &st->reg; has_accl = (st->chip_type != INV_ITG3500); - packet_size = BYTES_PER_SENSOR*(1 + has_accl); + if (has_accl) + packet_size = BYTES_PER_SENSOR * 2; + else + packet_size = BYTES_PER_SENSOR; result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) @@ -638,7 +919,7 @@ int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, if (result) return result; } - /*wait for the output to stable*/ + /* wait for the output to get stable */ if (self_test_flag) msleep(DEF_ST_STABLE_TIME); @@ -647,115 +928,130 @@ int inv_do_test(struct inv_gyro_state_s *st, int self_test_flag, if (result) return result; /* enable sensor output to FIFO */ - result = inv_i2c_single_write(st, reg->fifo_en, BITS_GYRO_OUT - | (has_accl << 3)); - if (result) - return result; - mdelay(DEF_GYRO_WAIT_TIME); - /* stop sending data to FIFO */ - result = inv_i2c_single_write(st, reg->fifo_en, 0); - if (result) - return result; - result = inv_i2c_read(st, reg->fifo_count_h, 2, data); + if (has_accl) + d = BITS_GYRO_OUT | BIT_ACCEL_OUT; + else + d = BITS_GYRO_OUT; + result = inv_i2c_single_write(st, reg->fifo_en, d); if (result) return result; - fifo_count = (data[0] << 8) + data[1]; - packet_count = fifo_count/packet_size; - gyro_result[0] = gyro_result[1] = gyro_result[2] = 0; - accl_result[0] = accl_result[1] = accl_result[2] = 0; - if (abs(packet_count - DEF_GYRO_PACKET_THRESH) > DEF_GYRO_THRESH) - return -EAGAIN; - - for (i = 0; i < packet_count; i++) { - /* getting FIFO data */ - result = inv_i2c_read(st, reg->fifo_r_w, - packet_size, data); + + for (i = 0; i < THREE_AXIS; i++) { + gyro_result[i] = 0; + accl_result[i] = 0; + } + s = 0; + while (s < st->self_test.samples) { + mdelay(DEF_GYRO_WAIT_TIME); + result = inv_i2c_read(st, reg->fifo_count_h, + FIFO_COUNT_BYTE, data); if (result) + return result; + fifo_count = be16_to_cpup((__be16 *)(&data[0])); + packet_count = fifo_count / packet_size; + result = inv_i2c_read(st, reg->fifo_r_w, packet_size, data); + if (result) + return result; + i = 0; + while ((i < packet_count) && (s < st->self_test.samples)) { + result = inv_i2c_read(st, reg->fifo_r_w, + packet_size, data); + if (result) return result; - ind = 0; - if (has_accl) { + ind = 0; + if (has_accl) { + for (j = 0; j < THREE_AXIS; j++) + accl_result[j] += + (short)be16_to_cpup( + (__be16 *)(&data[ind + 2 * j])); + ind += BYTES_PER_SENSOR; + } for (j = 0; j < THREE_AXIS; j++) - accl_result[j] += - (short)be16_to_cpup((__be16 - *)(&data[ind + 2*j])); - ind += 6; + gyro_result[j] += + (short)be16_to_cpup( + (__be16 *)(&data[ind + 2 * j])); + s++; + i++; } - for (j = 0; j < THREE_AXIS; j++) - gyro_result[j] += - (short)be16_to_cpup((__be16 *)(&data[ind + 2*j])); } - gyro_result[0] = gyro_result[0]*DEF_ST_PRECISION/packet_count; - gyro_result[1] = gyro_result[1]*DEF_ST_PRECISION/packet_count; - gyro_result[2] = gyro_result[2]*DEF_ST_PRECISION/packet_count; + /* stop sending data to FIFO */ + result = inv_i2c_single_write(st, reg->fifo_en, 0); + if (result) + return result; + for (j = 0; j < THREE_AXIS; j++) { + gyro_result[j] = gyro_result[j]/s; + gyro_result[j] *= DEF_ST_PRECISION; + } + if (has_accl) { - accl_result[0] = - accl_result[0]*DEF_ST_PRECISION/packet_count; - accl_result[1] = - accl_result[1]*DEF_ST_PRECISION/packet_count; - accl_result[2] = - accl_result[2]*DEF_ST_PRECISION/packet_count; + for (j = 0; j < THREE_AXIS; j++) { + accl_result[j] = accl_result[j]/s; + accl_result[j] *= DEF_ST_PRECISION; + } } return 0; } + /** * inv_recover_setting() recover the old settings after everything is done */ -static void inv_recover_setting(struct inv_gyro_state_s *st) +void inv_recover_setting(struct inv_mpu_iio_s *st) { struct inv_reg_map_s *reg; int data; - struct iio_dev *indio = iio_priv_to_dev(st); reg = &st->reg; - set_inv_enable(indio, st->chip_config.enable); inv_i2c_single_write(st, reg->gyro_config, - st->chip_config.fsr<chip_config.fsr << GYRO_CONFIG_FSR_SHIFT); inv_i2c_single_write(st, reg->lpf, st->chip_config.lpf); - data = ONE_K_HZ/st->chip_config.fifo_rate - 1; + data = ONE_K_HZ/st->chip_config.new_fifo_rate - 1; inv_i2c_single_write(st, reg->sample_rate_div, data); if (INV_ITG3500 != st->chip_type) { inv_i2c_single_write(st, reg->accl_config, - (st->chip_config.accl_fs << ACCL_CONFIG_FSR_SHIFT)); + (st->chip_config.accl_fs << + ACCL_CONFIG_FSR_SHIFT)); } - if (st->chip_config.is_asleep) - inv_set_power_state(st, 0); - else - inv_set_power_state(st, 1); + st->switch_gyro_engine(st, false); + st->switch_accl_engine(st, false); + st->set_power_state(st, false); } -static int inv_check_compass_self_test(struct inv_gyro_state_s *st) + +static int inv_check_compass_self_test(struct inv_mpu_iio_s *st) { int result; - unsigned char data[6]; - unsigned char counter, cntl; + u8 data[6]; + u8 counter, cntl; short x, y, z; - unsigned char *sens; + u8 *sens; sens = st->chip_info.compass_sens; - /*set to bypass mode */ - result = inv_i2c_single_write(st, REG_INT_PIN_CFG, BIT_BYPASS_EN); + /* set to bypass mode */ + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config | BIT_BYPASS_EN); if (result) { - inv_i2c_single_write(st, REG_INT_PIN_CFG, 0x0); + result = inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config); return result; } - /*set to power down mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_DN); + /* set to power down mode */ + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PD); if (result) goto AKM_fail; - /*write 1 to ASTC register */ + /* write 1 to ASTC register */ result = inv_secondary_write(REG_AKM_ST_CTRL, DATA_AKM_SELF_TEST); if (result) goto AKM_fail; - /*set self test mode */ - result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_ST); + /* set self test mode */ + result = inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_ST); if (result) goto AKM_fail; counter = DEF_ST_COMPASS_TRY_TIMES; while (counter > 0) { - usleep_range(DEF_ST_COMPASS_WAIT, DEF_ST_COMPASS_WAIT); + usleep_range(DEF_ST_COMPASS_WAIT_MIN, DEF_ST_COMPASS_WAIT_MAX); result = inv_secondary_read(REG_AKM_STATUS, 1, data); if (result) goto AKM_fail; @@ -765,7 +1061,7 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) counter = 0; } if ((data[0] & DATA_AKM_DRDY) == 0) { - result = -1; + result = -EINVAL; goto AKM_fail; } result = inv_secondary_read(REG_AKM_MEASURE_DATA, @@ -773,9 +1069,9 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) if (result) goto AKM_fail; - x = le16_to_cpup((__le16 *)(&data[0])); - y = le16_to_cpup((__le16 *)(&data[2])); - z = le16_to_cpup((__le16 *)(&data[4])); + x = le16_to_cpup((__le16 *)(&data[0])); + y = le16_to_cpup((__le16 *)(&data[2])); + z = le16_to_cpup((__le16 *)(&data[4])); x = ((x * (sens[0] + 128)) >> 8); y = ((y * (sens[1] + 128)) >> 8); z = ((z * (sens[2] + 128)) >> 8); @@ -789,7 +1085,7 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) z <<= DEF_ST_COMPASS_8963_SHIFT; } } - result = 1; + result = -EINVAL; if (x > st->compass_st_upper[X] || x < st->compass_st_lower[X]) goto AKM_fail; if (y > st->compass_st_upper[Y] || y < st->compass_st_lower[Y]) @@ -801,43 +1097,50 @@ static int inv_check_compass_self_test(struct inv_gyro_state_s *st) /*write 0 to ASTC register */ result |= inv_secondary_write(REG_AKM_ST_CTRL, 0); /*set to power down mode */ - result |= inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PW_DN); + result |= inv_secondary_write(REG_AKM_MODE, DATA_AKM_MODE_PD); /*restore to non-bypass mode */ - result |= inv_i2c_single_write(st, REG_INT_PIN_CFG, 0x0); + result |= inv_i2c_single_write(st, REG_INT_PIN_CFG, + st->plat_data.int_config); return result; } -static int inv_power_up_self_test(struct inv_gyro_state_s *st) + +int inv_power_up_self_test(struct inv_mpu_iio_s *st) { int result; - result = inv_i2c_single_write(st, st->reg.pwr_mgmt_1, INV_CLK_PLL); + + result = st->set_power_state(st, true); if (result) return result; - msleep(POWER_UP_TIME); - result = inv_i2c_single_write(st, st->reg.pwr_mgmt_2, 0); + result = st->switch_accl_engine(st, true); if (result) return result; - msleep(POWER_UP_TIME); + result = st->switch_gyro_engine(st, true); + if (result) + return result; + return 0; } + /** * inv_hw_self_test() - main function to do hardware self test */ -int inv_hw_self_test(struct inv_gyro_state_s *st) +int inv_hw_self_test(struct inv_mpu_iio_s *st) { int result; int gyro_bias_st[THREE_AXIS], gyro_bias_regular[THREE_AXIS]; int accl_bias_st[THREE_AXIS], accl_bias_regular[THREE_AXIS]; int test_times; char compass_result, accel_result, gyro_result; - if (st->chip_config.is_asleep || st->chip_config.lpa_mode) { - result = inv_power_up_self_test(st); - if (result) - return result; - } - compass_result = accel_result = gyro_result = 0; + + result = inv_power_up_self_test(st); + if (result) + return result; + compass_result = 0; + accel_result = 0; + gyro_result = 0; test_times = DEF_ST_TRY_TIMES; while (test_times > 0) { - result = inv_do_test(st, 0, gyro_bias_regular, + result = inv_do_test(st, 0, gyro_bias_regular, accl_bias_regular); if (result == -EAGAIN) test_times--; @@ -864,22 +1167,42 @@ int inv_hw_self_test(struct inv_gyro_state_s *st) } else { if (st->chip_config.has_compass) compass_result = !inv_check_compass_self_test(st); - accel_result = !inv_check_accl_self_test(st, - accl_bias_regular, accl_bias_st); - gyro_result = !inv_check_6050_gyro_self_test(st, - gyro_bias_regular, gyro_bias_st); + + if (INV_MPU6050 == st->chip_type) { + accel_result = !inv_check_accl_self_test(st, + accl_bias_regular, accl_bias_st); + gyro_result = !inv_check_6050_gyro_self_test(st, + gyro_bias_regular, gyro_bias_st); + } else if (INV_MPU6500 == st->chip_type) { + accel_result = !inv_check_6500_self_test(st, + accl_bias_regular, accl_bias_st, false); + gyro_result = !inv_check_6500_self_test(st, + gyro_bias_regular, gyro_bias_st, true); + } } test_fail: inv_recover_setting(st); - return (compass_result< 0; bank++, @@ -899,13 +1222,13 @@ static int inv_load_firmware(struct inv_gyro_state_s *st, return 0; } -static int inv_verify_firmware(struct inv_gyro_state_s *st, - unsigned char *data, int size) +static int inv_verify_firmware(struct inv_mpu_iio_s *st, + u8 *data, int size) { int bank, write_size; int result; - unsigned short memaddr; - unsigned char firmware[MPU_MEM_BANK_SIZE]; + u16 memaddr; + u8 firmware[MPU_MEM_BANK_SIZE]; /* Write and verify memory */ for (bank = 0; size > 0; bank++, @@ -917,7 +1240,7 @@ static int inv_verify_firmware(struct inv_gyro_state_s *st, write_size = size; memaddr = ((bank << 8) | 0x00); - result = mpu_memory_read(st->sl_handle, + result = mpu_memory_read(st, st->i2c_addr, memaddr, write_size, firmware); if (result) return result; @@ -927,18 +1250,18 @@ static int inv_verify_firmware(struct inv_gyro_state_s *st, return 0; } -static int inv_set_fifo_div(struct inv_gyro_state_s *st, - unsigned short fifoRate) +static int inv_set_fifo_div(struct inv_mpu_iio_s *st, + u16 fifoRate) { - unsigned char regs[2]; + u8 regs[2]; int result = 0; /*For some reason DINAC4 is defined as 0xb8, but DINBC4 is not*/ - const unsigned char regs_end[12] = {DINAFE, DINAF2, DINAAB, 0xc4, + const u8 regs_end[] = {DINAFE, DINAF2, DINAAB, 0xc4, DINAAA, DINAF1, DINADF, DINADF, 0xbb, 0xaf, DINADF, DINADF}; - regs[0] = (unsigned char)((fifoRate >> 8) & 0xff); - regs[1] = (unsigned char)(fifoRate & 0xff); + regs[0] = (u8)((fifoRate >> 8) & 0xff); + regs[1] = (u8)(fifoRate & 0xff); result = mem_w_key(KEY_D_0_22, ARRAY_SIZE(regs), regs); if (result) return result; @@ -950,28 +1273,29 @@ static int inv_set_fifo_div(struct inv_gyro_state_s *st, return result; } -int inv_send_quaternion(struct inv_gyro_state_s *st, int on) +int inv_send_quaternion(struct inv_mpu_iio_s *st, bool on) { - const unsigned char regs_on[] = {DINBC0, DINBC2, - DINBC4, DINBC6}; - const unsigned char regs_off[] = {DINA80, DINA80, - DINA80, DINA80}; - const unsigned char *regs; - unsigned char result; + const u8 regs_on[] = {DINBC0, DINBC2, + DINBC4, DINBC6}; + const u8 regs_off[] = {DINA80, DINA80, + DINA80, DINA80}; + const u8 *regs; + u8 result; if (on) regs = regs_on; else regs = regs_off; result = mem_w_key(KEY_CFG_LP_QUAT, ARRAY_SIZE(regs_on), regs); + return result; } -int inv_set_display_orient_interrupt_dmp(struct inv_gyro_state_s *st, - int on) +int inv_set_display_orient_interrupt_dmp(struct inv_mpu_iio_s *st, + bool on) { /*Turn on the display orientation interrupt in the DMP*/ int result; - unsigned char regs[1] = {0xd8}; + u8 regs[] = {0xd8}; if (on) regs[0] = 0xd9; @@ -979,16 +1303,16 @@ int inv_set_display_orient_interrupt_dmp(struct inv_gyro_state_s *st, return result; } -int inv_set_fifo_rate(struct inv_gyro_state_s *st, unsigned long fifo_rate) +int inv_set_fifo_rate(struct inv_mpu_iio_s *st, u16 fifo_rate) { - unsigned char divider; + u8 divider; int result; - divider = (unsigned char)(ONE_K_HZ/fifo_rate) - 1; + divider = (u8)(ONE_K_HZ / fifo_rate) - 1; if (divider > DMP_MAX_DIVIDER) { st->sample_divider = DMP_MAX_DIVIDER; st->fifo_divider = - (unsigned char)(DMP_DEFAULT_FIFO_RATE/fifo_rate)-1; + (u8)(DMP_DEFAULT_FIFO_RATE / fifo_rate) - 1; } else { st->sample_divider = divider; st->fifo_divider = 0; @@ -998,11 +1322,11 @@ int inv_set_fifo_rate(struct inv_gyro_state_s *st, unsigned long fifo_rate) return result; } -static int inv_set_tap_interrupt_dmp(struct inv_gyro_state_s *st, - unsigned char on) +static int inv_set_tap_interrupt_dmp(struct inv_mpu_iio_s *st, + u8 on) { int result; - unsigned char regs[] = {0}; + u8 regs[] = {0}; if (on) regs[0] = 0xf8; @@ -1013,40 +1337,23 @@ static int inv_set_tap_interrupt_dmp(struct inv_gyro_state_s *st, return result; return result; } -static int inv_set_orientation_interrupt_dmp(struct inv_gyro_state_s *st, - unsigned char on) -{ - int result; - unsigned char regs[2]; - if (on) { - regs[0] = DINBF8; - regs[1] = DINBF8; - } else { - regs[0] = DINAD8; - regs[1] = DINAD8; - } - result = mem_w_key(KEY_CFG_ORIENT_IRQ_1, ARRAY_SIZE(regs), regs); - if (result) - return result; - return result; -} -int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, - unsigned int axis, unsigned short threshold) +int inv_set_tap_threshold_dmp(struct inv_mpu_iio_s *st, + u32 axis, u16 threshold) { /* Sets the tap threshold in the dmp Simultaneously sets secondary tap threshold to help correct the tap direction for soft taps */ int result; /* DMP Algorithm */ - unsigned char data[2]; + u8 data[2]; int sampleDivider; int scaledThreshold; - unsigned int dmpThreshold; - unsigned char sample_div; -#define accel_sens (0x20000000/0x00010000) + u32 dmpThreshold; + u8 sample_div; + const u32 accel_sens = (0x20000000 / 0x00010000); - if ((axis & ~(INV_TAP_AXIS_ALL)) || (threshold > (1<<15))) + if ((axis & ~(INV_TAP_AXIS_ALL)) || (threshold > (1 << 15))) return -EINVAL; sample_div = st->sample_divider; @@ -1067,10 +1374,10 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, /* Scale to DMP 16 bit value */ if (accel_sens != 0) - dmpThreshold = (unsigned int)(scaledThreshold*accel_sens); + dmpThreshold = (u32)(scaledThreshold * accel_sens); else return -EINVAL; - dmpThreshold = dmpThreshold/DMP_PRECISION; + dmpThreshold = dmpThreshold / DMP_PRECISION; data[0] = dmpThreshold >> 8; data[1] = dmpThreshold & 0xFF; @@ -1083,8 +1390,8 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, /*Also set additional threshold for correcting the direction of taps that were very near the threshold. */ - data[0] = (dmpThreshold*3/4) >> 8; - data[1] = (dmpThreshold*3/4) & 0xFF; + data[0] = (dmpThreshold * 3 / 4) >> 8; + data[1] = (dmpThreshold * 3 / 4) & 0xFF; result = mem_w_key(KEY_D_1_36, ARRAY_SIZE(data), data); if (result) return result; @@ -1093,8 +1400,8 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, result = mem_w_key(KEY_DMP_TAP_THR_Y, 2, data); if (result) return result; - data[0] = (dmpThreshold*3/4) >> 8; - data[1] = (dmpThreshold*3/4) & 0xFF; + data[0] = (dmpThreshold * 3 / 4) >> 8; + data[1] = (dmpThreshold * 3 / 4) & 0xFF; result = mem_w_key(KEY_D_1_40, ARRAY_SIZE(data), data); if (result) @@ -1104,8 +1411,8 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, result = mem_w_key(KEY_DMP_TAP_THR_Z, ARRAY_SIZE(data), data); if (result) return result; - data[0] = (dmpThreshold*3/4) >> 8; - data[1] = (dmpThreshold*3/4) & 0xFF; + data[0] = (dmpThreshold * 3 / 4) >> 8; + data[1] = (dmpThreshold * 3 / 4) & 0xFF; result = mem_w_key(KEY_D_1_44, ARRAY_SIZE(data), data); if (result) @@ -1114,47 +1421,48 @@ int inv_set_tap_threshold_dmp(struct inv_gyro_state_s *st, return 0; } -static int inv_set_tap_axes_dmp(struct inv_gyro_state_s *st, - unsigned int axes) +static int inv_set_tap_axes_dmp(struct inv_mpu_iio_s *st, + u32 axes) { /* Sets a mask in the DMP that indicates what tap events should result in an interrupt */ - unsigned char regs[4]; - unsigned char result; + u8 regs[4]; + u8 result; /* check if any spurious bit other the ones expected are set */ if (axes & (~(INV_TAP_ALL_DIRECTIONS))) return -EINVAL; - regs[0] = (unsigned char)axes; + regs[0] = (u8)axes; result = mem_w_key(KEY_D_1_72, 1, regs); return result; } -int inv_set_min_taps_dmp(struct inv_gyro_state_s *st, - unsigned int min_taps) { +int inv_set_min_taps_dmp(struct inv_mpu_iio_s *st, + u16 min_taps) { /*Indicates the minimum number of consecutive taps required before the DMP will generate an interrupt */ - unsigned char regs[1]; - unsigned char result; + u8 regs[1]; + u8 result; /* check if any spurious bit other the ones expected are set */ if ((min_taps > DMP_MAX_MIN_TAPS) || (min_taps < 1)) return -EINVAL; - regs[0] = (unsigned char)(min_taps-1); + regs[0] = (u8)(min_taps-1); result = mem_w_key(KEY_D_1_79, ARRAY_SIZE(regs), regs); return result; } -int inv_set_tap_time_dmp(struct inv_gyro_state_s *st, unsigned int time) + +int inv_set_tap_time_dmp(struct inv_mpu_iio_s *st, u16 time) { /* Determines how long after a tap the DMP requires before another tap can be registered*/ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1165,19 +1473,20 @@ int inv_set_tap_time_dmp(struct inv_gyro_state_s *st, unsigned int time) data[1] = dmpTime & 0xFF; result = mem_w_key(KEY_DMP_TAPW_MIN, ARRAY_SIZE(data), data); + return result; } -static int inv_set_multiple_tap_time_dmp(struct inv_gyro_state_s *st, - unsigned int time) +static int inv_set_multiple_tap_time_dmp(struct inv_mpu_iio_s *st, + u32 time) { /*Determines how close together consecutive taps must occur to be considered double/triple taps*/ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1186,21 +1495,25 @@ static int inv_set_multiple_tap_time_dmp(struct inv_gyro_state_s *st, dmpTime = ((time) / sampleDivider); data[0] = dmpTime >> 8; data[1] = dmpTime & 0xFF; - result = mem_w_key(KEY_D_1_218, ARRAY_SIZE(data), data); + return result; } -long inv_q30_mult(long a, long b) + +int inv_q30_mult(int a, int b) { - long long temp; - long result; - temp = (long long)a * b; - result = (long)(temp >> DMP_MULTI_SHIFT); + u64 temp; + int result; + + temp = (u64)a * b; + result = (int)(temp >> DMP_MULTI_SHIFT); + return result; } -static unsigned short inv_row_2_scale(const signed char *row) + +static u16 inv_row_2_scale(const s8 *row) { - unsigned short b; + u16 b; if (row[0] > 0) b = 0; @@ -1216,7 +1529,7 @@ static unsigned short inv_row_2_scale(const signed char *row) b = 6; else b = 7; - /* error */ + return b; } @@ -1234,10 +1547,10 @@ static unsigned short inv_row_2_scale(const signed char *row) * bit number 8 being the sign. In binary the identity matrix would therefor * be: 010_001_000 or 0x88 in hex. */ -static unsigned short inv_orientation_matrix_to_scaler(const signed char *mtx) +static u16 inv_orientation_matrix_to_scaler(const signed char *mtx) { - unsigned short scalar; + u16 scalar; scalar = inv_row_2_scale(mtx); scalar |= inv_row_2_scale(mtx + 3) << 3; scalar |= inv_row_2_scale(mtx + 6) << 6; @@ -1245,15 +1558,25 @@ static unsigned short inv_orientation_matrix_to_scaler(const signed char *mtx) return scalar; } -static int inv_gyro_dmp_cal(struct inv_gyro_state_s *st) +static int inv_disable_gyro_cal(struct inv_mpu_iio_s *st) +{ + const u8 regs[] = { + 0xb8, 0xaa, 0xaa, 0xaa, + 0xb0, 0x88, 0xc3, 0xc5, + 0xc7 + }; + return mem_w_key(KEY_CFG_MOTION_BIAS, ARRAY_SIZE(regs), regs); +} + +static int inv_gyro_dmp_cal(struct inv_mpu_iio_s *st) { int inv_gyro_orient; - unsigned char regs[3]; + u8 regs[3]; int result; - unsigned char tmpD = DINA4C; - unsigned char tmpE = DINACD; - unsigned char tmpF = DINA6C; + u8 tmpD = DINA4C; + u8 tmpE = DINACD; + u8 tmpF = DINA6C; inv_gyro_orient = inv_orientation_matrix_to_scaler(st->plat_data.orientation); @@ -1277,7 +1600,7 @@ static int inv_gyro_dmp_cal(struct inv_gyro_state_s *st) else if ((inv_gyro_orient & 0xc0) == 0x80) regs[2] = tmpF; - result = mem_w_key(KEY_FCFG_1, 3, regs); + result = mem_w_key(KEY_FCFG_1, ARRAY_SIZE(regs), regs); if (result) return result; @@ -1295,22 +1618,23 @@ static int inv_gyro_dmp_cal(struct inv_gyro_state_s *st) regs[2] = DINA76; result = mem_w_key(KEY_FCFG_3, ARRAY_SIZE(regs), regs); + return result; } -static int inv_accel_dmp_cal(struct inv_gyro_state_s *st) +static int inv_accel_dmp_cal(struct inv_mpu_iio_s *st) { int inv_accel_orient; int result; - unsigned char regs[3]; - const unsigned char tmp[3] = { DINA0C, DINAC9, DINA2C }; + u8 regs[3]; + const u8 tmp[3] = { DINA0C, DINAC9, DINA2C }; inv_accel_orient = inv_orientation_matrix_to_scaler(st->plat_data.orientation); regs[0] = tmp[inv_accel_orient & 3]; regs[1] = tmp[(inv_accel_orient >> 3) & 3]; regs[2] = tmp[(inv_accel_orient >> 6) & 3]; - result = mem_w_key(KEY_FCFG_2, 3, regs); + result = mem_w_key(KEY_FCFG_2, ARRAY_SIZE(regs), regs); if (result) return result; @@ -1324,62 +1648,110 @@ static int inv_accel_dmp_cal(struct inv_gyro_state_s *st) if (inv_accel_orient & 0x100) regs[2] |= 1; result = mem_w_key(KEY_FCFG_7, ARRAY_SIZE(regs), regs); + return result; } -#define gyro_sens (0x03e80000) -static int inv_set_gyro_sf_dmp(struct inv_gyro_state_s *st) +static u16 inv_orientation_matrix_to_scalar(const s8 *mtx) +{ + + u16 scalar; + + /* + XYZ 010_001_000 Identity Matrix + XZY 001_010_000 + YXZ 010_000_001 + YZX 000_010_001 + ZXY 001_000_010 + ZYX 000_001_010 + */ + + scalar = inv_row_2_scale(mtx); + scalar |= inv_row_2_scale(mtx + 3) << 3; + scalar |= inv_row_2_scale(mtx + 6) << 6; + + return scalar; +} + +int inv_set_accel_bias_dmp(struct inv_mpu_iio_s *st) +{ + int inv_accel_orient, result, i, accel_bias_body[3], out[3]; + int tmp[] = {1, 1, 1}; + int mask[] = {4, 0x20, 0x100}; + int accel_sf = 0x20000000;/* 536870912 */ + u8 *regs; + + inv_accel_orient = + inv_orientation_matrix_to_scalar(st->plat_data.orientation); + + for (i = 0; i < 3; i++) + if (inv_accel_orient & mask[i]) + tmp[i] = -1; + + for (i = 0; i < 3; i++) + accel_bias_body[i] = st->input_accel_bias[(inv_accel_orient >> + (i * 3)) & 3] * tmp[i]; + for (i = 0; i < 3; i++) + accel_bias_body[i] = inv_q30_mult(accel_sf, + accel_bias_body[i]); + for (i = 0; i < 3; i++) + out[i] = cpu_to_be32p(&accel_bias_body[i]); + regs = (u8 *)out; + result = mem_w_key(KEY_D_ACCEL_BIAS, sizeof(out), regs); + + return result; +} + +static int inv_set_gyro_sf_dmp(struct inv_mpu_iio_s *st) { /*The gyro threshold, in dps, above which taps will be rejected*/ - int result, out; + int result; /* DMP Algorithm */ - unsigned char sampleDivider; - unsigned char *regs; - int gyro_sf; + u8 sampleDivider; + u32 gyro_sf; + const u32 gyro_sens = 0x03e80000; sampleDivider = st->sample_divider; gyro_sf = inv_q30_mult(gyro_sens, - (int)(DMP_TAP_SCALE * (sampleDivider+1))); + (int)(DMP_TAP_SCALE * (sampleDivider + 1))); + result = write_be32_key_to_mem(st, gyro_sf, KEY_D_0_104); - out = cpu_to_be32p(&gyro_sf); - regs = (unsigned char *)&out; - result = mem_w_key(KEY_D_0_104, sizeof(out), regs); return result; } -static int inv_set_shake_reject_thresh_dmp(struct inv_gyro_state_s *st, + +static int inv_set_shake_reject_thresh_dmp(struct inv_mpu_iio_s *st, int thresh) { /*THIS FUNCTION FAILS MEM_W*/ /*The gyro threshold, in dps, above which taps will be rejected */ - int result, out; + int result; /* DMP Algorithm */ - unsigned char sampleDivider; + u8 sampleDivider; int thresh_scaled; - unsigned char *regs; - long gyro_sf; + u32 gyro_sf; + const u32 gyro_sens = 0x03e80000; sampleDivider = st->sample_divider; gyro_sf = inv_q30_mult(gyro_sens, (int)(DMP_TAP_SCALE * - (sampleDivider+1))); + (sampleDivider + 1))); /* We're in units of DPS, convert it back to chip units*/ /*split the operation to aviod overflow of integer*/ - thresh_scaled = gyro_sens/(1L<<16); - thresh_scaled = thresh_scaled/thresh; + thresh_scaled = gyro_sens / (1L << 16); + thresh_scaled = thresh_scaled / thresh; thresh_scaled = gyro_sf / thresh_scaled; - out = cpu_to_be32p(&thresh_scaled); - regs = (unsigned char *)&out; + result = write_be32_key_to_mem(st, thresh_scaled, KEY_D_1_92); - result = mem_w_key(KEY_D_1_92, sizeof(out), regs); return result; } -static int inv_set_shake_reject_time_dmp(struct inv_gyro_state_s *st, - unsigned int time) + +static int inv_set_shake_reject_time_dmp(struct inv_mpu_iio_s *st, + u32 time) { /* How long a gyro axis must remain above its threshold before taps are rejected */ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1393,16 +1765,16 @@ static int inv_set_shake_reject_time_dmp(struct inv_gyro_state_s *st, return result; } -static int inv_set_shake_reject_timeout_dmp(struct inv_gyro_state_s *st, - unsigned int time) +static int inv_set_shake_reject_timeout_dmp(struct inv_mpu_iio_s *st, + u32 time) { /*How long the gyros must remain below their threshold, after taps have been rejected, before taps can be detected again*/ int result; /* DMP Algorithm */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; + u16 dmpTime; + u8 data[2]; + u8 sampleDivider; sampleDivider = st->sample_divider; sampleDivider++; @@ -1416,45 +1788,49 @@ static int inv_set_shake_reject_timeout_dmp(struct inv_gyro_state_s *st, return result; } -static int inv_set_interrupt_on_gesture_event(struct inv_gyro_state_s *st, - char on) +int inv_set_interrupt_on_gesture_event(struct inv_mpu_iio_s *st, bool on) { - unsigned char result; - const unsigned char regs_on[] = {DINADA, DINADA, DINAB1, DINAB9, - DINAF3, DINA8B, DINAA3, DINA91, - DINAB6, DINADA, DINAB4, DINADA}; - const unsigned char regs_off[] = {0xd8, 0xd8, 0xb1, 0xb9, 0xf3, 0x8b, - 0xa3, 0x91, 0xb6, 0x09, 0xb4, 0xd9}; + u8 result; + const u8 regs_on[] = {DINADA, DINAB1, DINAB9, + DINAF3, DINA8B, DINAA3, DINA91, + DINAB6, DINADA, DINAB4, DINADA}; + const u8 regs_off[] = {0xd8, 0xb1, 0xb9, 0xf3, 0x8b, + 0xa3, 0x91, 0xb6, 0x09, 0xb4, 0xd9}; /*For some reason DINAC4 is defined as 0xb8, but DINBC4 is not defined.*/ - const unsigned char regs_end[] = {DINAFE, DINAF2, DINAAB, 0xc4, - DINAAA, DINAF1, DINADF, DINADF}; - if (on) { + const u8 regs_end[] = {DINAFE, DINAF2, DINAAB, 0xc4, + DINAAA, DINAF1, DINADF, DINADF, + 0xbb, 0xaf, DINADF, DINADF}; + const u8 regs[] = {0, 0}; + /* reset fifo count to zero */ + result = mem_w_key(KEY_D_1_178, ARRAY_SIZE(regs), regs); + if (result) + return result; + + if (on) /*Sets the DMP to send an interrupt and put a FIFO packet in the FIFO if and only if a tap/orientation event just occurred*/ result = mem_w_key(KEY_CFG_FIFO_ON_EVENT, ARRAY_SIZE(regs_on), regs_on); - if (result) - return result; - } else { + else /*Sets the DMP to send an interrupt and put a FIFO packet in the FIFO at the rate specified by the FIFO div. see inv_set_fifo_div in hw_setup.c to set the FIFO div.*/ result = mem_w_key(KEY_CFG_FIFO_ON_EVENT, ARRAY_SIZE(regs_off), regs_off); - if (result) - return result; - } + if (result) + return result; result = mem_w_key(KEY_CFG_6, ARRAY_SIZE(regs_end), regs_end); + return result; } /** * inv_enable_tap_dmp() - calling this function will enable/disable tap function. */ -int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on) +int inv_enable_tap_dmp(struct inv_mpu_iio_s *st, bool on) { int result; result = inv_set_tap_interrupt_dmp(st, on); @@ -1462,15 +1838,15 @@ int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on) return result; if (on) { result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_X, - st->tap.thresh); + st->tap.thresh); if (result) return result; result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Y, - st->tap.thresh); + st->tap.thresh); if (result) return result; result = inv_set_tap_threshold_dmp(st, INV_TAP_AXIS_Z, - st->tap.thresh); + st->tap.thresh); if (result) return result; } @@ -1503,103 +1879,17 @@ int inv_enable_tap_dmp(struct inv_gyro_state_s *st, unsigned char on) return result; result = inv_set_shake_reject_timeout_dmp(st, - DMP_SHAKE_REJECT_TIMEOUT); - if (result) - return result; - - result = inv_set_interrupt_on_gesture_event(st, 0); - return result; -} -static int inv_set_orientation_dmp(struct inv_gyro_state_s *st, - int orientation) -{ - /*Set a mask in the DMP determining what orientations - will trigger interrupts*/ - unsigned char regs[4]; - unsigned char result; - - /* check if any spurious bit other the ones expected are set */ - if (orientation & (~(INV_ORIENTATION_ALL | INV_ORIENTATION_FLIP))) - return -EINVAL; - - regs[0] = (unsigned char)orientation; - result = mem_w_key(KEY_D_1_74, 1, regs); - return result; -} -static int inv_set_orientation_thresh_dmp(struct inv_gyro_state_s *st, - int angle) -{ - /*Set an angle threshold in the DMP determining - when orientations change*/ - unsigned char *regs; - unsigned char result; - unsigned int out; - unsigned int d; - const unsigned int threshold[] = {138952416, 268435455, 379625062, - 464943848, 518577479, 536870912}; - /*threshold = (long)((1<<29) * sin((angle * M_PI) / 180.));*/ - d = angle/DMP_ANGLE_SCALE; - d -= 1; - if (d >= ARRAY_SIZE(threshold)) - return -EPERM; - out = cpu_to_be32p(&threshold[d]); - regs = (unsigned char *)&out; - - result = mem_w_key(KEY_D_1_232, sizeof(out), regs); - return result; -} -static int inv_set_orientation_time_dmp(struct inv_gyro_state_s *st, - unsigned int time) -{ - /*Determines the stability time required before a - new orientation can be adopted */ - unsigned short dmpTime; - unsigned char data[2]; - unsigned char sampleDivider; - unsigned char result; - /* First check if we are allowed to call this function here */ - sampleDivider = st->sample_divider; - sampleDivider++; - /* 60 ms minimum time added */ - dmpTime = ((time) / sampleDivider); - data[0] = dmpTime >> 8; - data[1] = dmpTime & 0xFF; - result = mem_w_key(KEY_D_1_250, 2, data); - - return result; -} - -/** - * inv_enable_orientation_dmp() - calling this function will - * enable/disable orientation function. - */ -int inv_enable_orientation_dmp(struct inv_gyro_state_s *st, int on) -{ - int result; - result = inv_set_orientation_interrupt_dmp(st, on); - if (result) - return result; - result = inv_set_orientation_dmp(st, 0x40 | INV_ORIENTATION_ALL); - if (result) - return result; - result = inv_set_gyro_sf_dmp(st); - if (result) - return result; - result = inv_set_orientation_thresh_dmp(st, DMP_ORIENTATION_ANGLE); - if (result) - return result; - result = inv_set_orientation_time_dmp(st, DMP_ORIENTATION_TIME); + DMP_SHAKE_REJECT_TIMEOUT); return result; } -static int inv_send_sensor_data(struct inv_gyro_state_s *st, - unsigned short elements) +int inv_send_sensor_data(struct inv_mpu_iio_s *st, u16 elements) { int result; - unsigned char regs[] = { DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, + u8 regs[] = {DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, - DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, DINAA0 + 3 - }; + DINAA0 + 3, DINAA0 + 3, DINAA0 + 3, + DINAA0 + 3}; if (elements & INV_ELEMENT_1) regs[0] = DINACA; @@ -1609,8 +1899,8 @@ static int inv_send_sensor_data(struct inv_gyro_state_s *st, regs[5] = DINACC; if (elements & INV_ELEMENT_4) regs[6] = DINBC6; - if ((elements & INV_ELEMENT_5) || (elements & INV_ELEMENT_6) - || (elements & INV_ELEMENT_7)) { + if ((elements & INV_ELEMENT_5) || (elements & INV_ELEMENT_6) || + (elements & INV_ELEMENT_7)) { regs[1] = DINBC0; regs[2] = DINAC8; regs[3] = DINBC2; @@ -1618,12 +1908,18 @@ static int inv_send_sensor_data(struct inv_gyro_state_s *st, result = mem_w_key(KEY_CFG_15, ARRAY_SIZE(regs), regs); return result; } -static int inv_send_interrupt_word(struct inv_gyro_state_s *st) + +int inv_send_interrupt_word(struct inv_mpu_iio_s *st, bool on) { - const unsigned char regs[] = { DINA20 }; - unsigned char result; + const u8 regs_on[] = { DINA20 }; + const u8 regs_off[] = { DINAA3 }; + u8 result; + + if (on) + result = mem_w_key(KEY_CFG_27, ARRAY_SIZE(regs_on), regs_on); + else + result = mem_w_key(KEY_CFG_27, ARRAY_SIZE(regs_off), regs_off); - result = mem_w_key(KEY_CFG_27, ARRAY_SIZE(regs), regs); return result; } @@ -1635,25 +1931,42 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t pos, size_t size) { - unsigned char *firmware; + u8 *firmware; int result; struct inv_reg_map_s *reg; struct iio_dev *indio_dev; - struct inv_gyro_state_s *st; + struct inv_mpu_iio_s *st; indio_dev = dev_get_drvdata(container_of(kobj, struct device, kobj)); st = iio_priv(indio_dev); - if (st->chip_config.is_asleep) - return -EPERM; - if (1 == st->chip_config.firmware_loaded) + if (st->chip_config.firmware_loaded) return -EINVAL; + reg = &st->reg; + if (DMP_IMAGE_SIZE != size) { + pr_err("wrong DMP image size\n"); + return -EINVAL; + } firmware = kmalloc(size, GFP_KERNEL); if (!firmware) return -ENOMEM; + + mutex_lock(&indio_dev->mlock); + memcpy(firmware, buf, size); + result = crc32(CRC_FIRMWARE_SEED, firmware, size); + if (DMP_IMAGE_CRC_VALUE != result) { + pr_err("firmware CRC error - 0x%08x vs 0x%08x\n", + result, DMP_IMAGE_CRC_VALUE); + result = -EINVAL; + goto firmware_write_fail; + } + + result = st->set_power_state(st, true); + if (result) + goto firmware_write_fail; result = inv_load_firmware(st, firmware, size); if (result) @@ -1672,16 +1985,7 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, if (result) goto firmware_write_fail; - result = inv_verify_firmware(st, firmware, size); - if (result) - goto firmware_write_fail; result = inv_set_fifo_rate(st, DMP_DEFAULT_FIFO_RATE); - if (result) - goto firmware_write_fail; - result = inv_send_sensor_data(st, INV_GYRO_ACC_MASK); - if (result) - goto firmware_write_fail; - result = inv_send_interrupt_word(st); if (result) goto firmware_write_fail; result = inv_gyro_dmp_cal(st); @@ -1690,28 +1994,42 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, result = inv_accel_dmp_cal(st); if (result) goto firmware_write_fail; + result = inv_disable_gyro_cal(st); if (result) goto firmware_write_fail; + st->chip_config.firmware_loaded = 1; - result = size; + firmware_write_fail: + result |= st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); kfree(firmware); - return result; + if (result) + return result; + return size; } + ssize_t inv_dmp_firmware_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { int bank, write_size, size, data, result; - unsigned short memaddr; + u16 memaddr; struct iio_dev *indio_dev; - struct inv_gyro_state_s *st; - size = count; + struct inv_mpu_iio_s *st; + size = count; indio_dev = dev_get_drvdata(container_of(kobj, struct device, kobj)); st = iio_priv(indio_dev); + data = 0; + mutex_lock(&indio_dev->mlock); + result = st->set_power_state(st, true); + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } for (bank = 0; size > 0; bank++, size -= write_size, data += write_size) { if (size > MPU_MEM_BANK_SIZE) @@ -1719,15 +2037,21 @@ ssize_t inv_dmp_firmware_read(struct file *filp, else write_size = size; - memaddr = ((bank << 8) | 0x00); - result = mpu_memory_read(st->sl_handle, + memaddr = (bank << 8); + result = mpu_memory_read(st, st->i2c_addr, memaddr, write_size, &buf[data]); - if (result) + if (result) { + mutex_unlock(&indio_dev->mlock); return result; + } } + result = st->set_power_state(st, false); + mutex_unlock(&indio_dev->mlock); + if (result) + return result; + return count; } /** * @} */ - diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c index 880b0b8c790..29ce25078cf 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c @@ -17,11 +17,13 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro_misc.c + * @file inv_mpu_ring.c * @brief A sysfs device driver for Invensense gyroscopes. - * @details This file is part of inv_gyro driver code + * @details This file is part of inv mpu iio driver code */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -35,12 +37,14 @@ #include #include #include -#include "inv_mpu_iio.h" + #include "../../iio.h" #include "../../kfifo_buf.h" #include "../../trigger_consumer.h" #include "../../sysfs.h" +#include "inv_mpu_iio.h" + /** * reset_fifo_mpu3050() - Reset FIFO related registers * @st: Device driver instance. @@ -49,27 +53,13 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) { struct inv_reg_map_s *reg; int result; - unsigned char val, user_ctrl; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - + u8 val, user_ctrl; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); reg = &st->reg; - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Z)) - st->chip_config.gyro_fifo_enable = 1; - else - st->chip_config.gyro_fifo_enable = 0; - - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Z)) - st->chip_config.accl_fifo_enable = 1; - else - st->chip_config.accl_fifo_enable = 0; /* disable interrupt */ - result = inv_i2c_single_write(st, reg->int_enable, 0); + result = inv_i2c_single_write(st, reg->int_enable, + st->plat_data.int_config); if (result) return result; /* disable the sensor output to FIFO */ @@ -87,11 +77,11 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; - st->last_isr_time = iio_get_time_ns(); + st->last_isr_time = get_time_ns(); if (st->chip_config.dmp_on) { /* enable interrupt when DMP is done */ result = inv_i2c_single_write(st, reg->int_enable, - BIT_DMP_INT_EN); + st->plat_data.int_config | BIT_DMP_INT_EN); if (result) return result; @@ -102,9 +92,9 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) } else { /* enable interrupt */ if (st->chip_config.accl_fifo_enable || - st->chip_config.gyro_fifo_enable){ + st->chip_config.gyro_fifo_enable) { result = inv_i2c_single_write(st, reg->int_enable, - BIT_DATA_RDY_EN); + st->plat_data.int_config | BIT_DATA_RDY_EN); if (result) return result; } @@ -130,48 +120,129 @@ static int reset_fifo_mpu3050(struct iio_dev *indio_dev) val = BIT_DMP_INT_EN; else val = BIT_DATA_RDY_EN; - inv_i2c_single_write(st, reg->int_enable, val); - pr_err("%s failed\n", __func__); + inv_i2c_single_write(st, reg->int_enable, + st->plat_data.int_config | val); + pr_err("reset fifo failed\n"); + return result; } + /** - * reset_fifo_itg() - Reset FIFO related registers. - * @st: Device driver instance. + * inv_set_lpf() - set low pass filer based on fifo rate. */ -static int reset_fifo_itg(struct iio_dev *indio_dev) +static int inv_set_lpf(struct inv_mpu_iio_s *st, int rate) { + const short hz[] = {188, 98, 42, 20, 10, 5}; + const int d[] = {INV_FILTER_188HZ, INV_FILTER_98HZ, + INV_FILTER_42HZ, INV_FILTER_20HZ, + INV_FILTER_10HZ, INV_FILTER_5HZ}; + int i, h, data, result; struct inv_reg_map_s *reg; + reg = &st->reg; + h = (rate >> 1); + i = 0; + while ((h < hz[i]) && (i < ARRAY_SIZE(d) - 1)) + i++; + data = d[i]; + if (INV_MPU3050 == st->chip_type) { + if (st->mpu_slave != NULL) { + result = st->mpu_slave->set_lpf(st, rate); + if (result) + return result; + } + result = inv_i2c_single_write(st, reg->lpf, data | + (st->chip_config.fsr << GYRO_CONFIG_FSR_SHIFT)); + } else { + result = inv_i2c_single_write(st, reg->lpf, data); + } + if (result) + return result; + st->chip_config.lpf = data; + + return 0; +} + +/** + * set_fifo_rate_reg() - Set fifo rate in hardware register + */ +static int set_fifo_rate_reg(struct inv_mpu_iio_s *st) +{ + u8 data; + u16 fifo_rate; int result; - unsigned char val; - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; + struct inv_reg_map_s *reg; reg = &st->reg; - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Z)) - st->chip_config.gyro_fifo_enable = 1; - else - st->chip_config.gyro_fifo_enable = 0; + fifo_rate = st->chip_config.new_fifo_rate; + data = ONE_K_HZ / fifo_rate - 1; + result = inv_i2c_single_write(st, reg->sample_rate_div, data); + if (result) + return result; + result = inv_set_lpf(st, fifo_rate); + if (result) + return result; + st->chip_config.fifo_rate = fifo_rate; - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Z)) - st->chip_config.accl_fifo_enable = 1; - else - st->chip_config.accl_fifo_enable = 0; + return 0; +} - if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_X) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Y) || - iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Z)) - st->chip_config.compass_fifo_enable = 1; +/** + * inv_lpa_mode() - store current low power mode settings + */ +static int inv_lpa_mode(struct inv_mpu_iio_s *st, int lpa_mode) +{ + unsigned long result; + u8 d; + struct inv_reg_map_s *reg; + + reg = &st->reg; + result = inv_i2c_read(st, reg->pwr_mgmt_1, 1, &d); + if (result) + return result; + if (lpa_mode) + d |= BIT_CYCLE; else - st->chip_config.compass_fifo_enable = 0; + d &= ~BIT_CYCLE; + + result = inv_i2c_single_write(st, reg->pwr_mgmt_1, d); + if (result) + return result; + if (INV_MPU6500 == st->chip_type) { + if (lpa_mode) + d = BIT_ACCEL_FCHOCIE_B; + else + d = 0; + result = inv_i2c_single_write(st, REG_6500_ACCEL_CONFIG2, d); + if (result) + return result; + } + + return 0; +} + +/** + * reset_fifo_itg() - Reset FIFO related registers. + * @st: Device driver instance. + */ +static int reset_fifo_itg(struct iio_dev *indio_dev) +{ + struct inv_reg_map_s *reg; + int result, data; + u8 val, int_word; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + reg = &st->reg; + if (st->chip_config.lpa_mode) { + result = inv_lpa_mode(st, 0); + if (result) { + pr_err("reset lpa mode failed\n"); + return result; + } + } /* disable interrupt */ result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) { - pr_err("%s failed\n", __func__); + pr_err("int_enable write failed\n"); return result; } /* disable the sensor output to FIFO */ @@ -182,41 +253,75 @@ static int reset_fifo_itg(struct iio_dev *indio_dev) result = inv_i2c_single_write(st, reg->user_ctrl, 0); if (result) goto reset_fifo_fail; + int_word = 0; + + /* MPU6500's BIT_6500_WOM_EN is the same as BIT_MOT_EN */ + if (st->mot_int.mot_on) + int_word |= BIT_MOT_EN; if (st->chip_config.dmp_on) { val = (BIT_FIFO_RST | BIT_DMP_RST); result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; - st->last_isr_time = iio_get_time_ns(); + st->last_isr_time = get_time_ns(); if (st->chip_config.dmp_int_on) { + int_word |= BIT_DMP_INT_EN; result = inv_i2c_single_write(st, reg->int_enable, - BIT_DMP_INT_EN); + int_word); if (result) return result; } val = (BIT_DMP_EN | BIT_FIFO_EN); - if (st->chip_config.compass_enable) + if (st->chip_config.compass_enable & + (!st->chip_config.dmp_event_int_on)) val |= BIT_I2C_MST_EN; result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; + + if (st->chip_config.compass_enable) { + /* I2C_MST_DLY is set according to sample rate, + slow down the power*/ + data = max(COMPASS_RATE_SCALE * + st->chip_config.new_fifo_rate / ONE_K_HZ, + st->chip_config.new_fifo_rate / + st->chip_config.dmp_output_rate); + if (data > 0) + data -= 1; + result = inv_i2c_single_write(st, REG_I2C_SLV4_CTRL, + data); + if (result) + return result; + } + val = 0; + if (st->chip_config.accl_fifo_enable) + val |= INV_ACCL_MASK; + if (st->chip_config.gyro_fifo_enable) + val |= INV_GYRO_MASK; + result = inv_send_sensor_data(st, val); + if (result) + return result; + if (st->chip_config.display_orient_on || st->chip_config.tap_on) + result = inv_send_interrupt_word(st, true); + else + result = inv_send_interrupt_word(st, false); } else { /* reset FIFO and possibly reset I2C*/ val = BIT_FIFO_RST; result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; - st->last_isr_time = iio_get_time_ns(); + st->last_isr_time = get_time_ns(); /* enable interrupt */ if (st->chip_config.accl_fifo_enable || - st->chip_config.gyro_fifo_enable || - st->chip_config.compass_enable){ - result = inv_i2c_single_write(st, reg->int_enable, - BIT_DATA_RDY_EN); - if (result) - return result; + st->chip_config.gyro_fifo_enable || + st->chip_config.compass_enable) { + int_word |= BIT_DATA_RDY_EN; } + result = inv_i2c_single_write(st, reg->int_enable, int_word); + if (result) + return result; /* enable FIFO reading and I2C master interface*/ val = BIT_FIFO_EN; if (st->chip_config.compass_enable) @@ -224,6 +329,18 @@ static int reset_fifo_itg(struct iio_dev *indio_dev) result = inv_i2c_single_write(st, reg->user_ctrl, val); if (result) goto reset_fifo_fail; + if (st->chip_config.compass_enable) { + /* I2C_MST_DLY is set according to sample rate, + slow down the power*/ + data = COMPASS_RATE_SCALE * + st->chip_config.new_fifo_rate / ONE_K_HZ; + if (data > 0) + data -= 1; + result = inv_i2c_single_write(st, REG_I2C_SLV4_CTRL, + data); + if (result) + return result; + } /* enable sensor output to FIFO */ val = 0; if (st->chip_config.gyro_fifo_enable) @@ -234,28 +351,65 @@ static int reset_fifo_itg(struct iio_dev *indio_dev) if (result) goto reset_fifo_fail; } + st->chip_config.normal_compass_measure = 0; + result = inv_lpa_mode(st, st->chip_config.lpa_mode); + if (result) + goto reset_fifo_fail; + return 0; + reset_fifo_fail: if (st->chip_config.dmp_on) val = BIT_DMP_INT_EN; else val = BIT_DATA_RDY_EN; inv_i2c_single_write(st, reg->int_enable, val); - pr_err("%s failed\n", __func__); + pr_err("reset fifo failed\n"); + return result; } + +/** + * inv_clear_kfifo() - clear time stamp fifo + * @st: Device driver instance. + */ +static void inv_clear_kfifo(struct inv_mpu_iio_s *st) +{ + unsigned long flags; + + spin_lock_irqsave(&st->time_stamp_lock, flags); + kfifo_reset(&st->timestamps); + spin_unlock_irqrestore(&st->time_stamp_lock, flags); +} + /** * inv_reset_fifo() - Reset FIFO related registers. * @st: Device driver instance. */ static int inv_reset_fifo(struct iio_dev *indio_dev) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + + inv_clear_kfifo(st); if (INV_MPU3050 == st->chip_type) return reset_fifo_mpu3050(indio_dev); else return reset_fifo_itg(indio_dev); } + +static int inv_set_dmp_sysfs(struct inv_mpu_iio_s *st) +{ + int result; + + result = inv_set_fifo_rate(st, st->chip_config.dmp_output_rate); + if (result) + return result; + result = inv_set_interrupt_on_gesture_event(st, + st->chip_config.dmp_event_int_on); + + return result; +} + /** * set_inv_enable() - Reset FIFO related registers. * This also powers on the chip if needed. @@ -263,47 +417,77 @@ static int inv_reset_fifo(struct iio_dev *indio_dev) * @fifo_enable: enable/disable */ int set_inv_enable(struct iio_dev *indio_dev, - unsigned long enable) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + bool enable) { + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct inv_reg_map_s *reg; int result; - if (st->chip_config.is_asleep) - return -EINVAL; reg = &st->reg; if (enable) { + if (st->chip_config.new_fifo_rate != + st->chip_config.fifo_rate) { + result = set_fifo_rate_reg(st); + if (result) + return result; + } + if (st->chip_config.dmp_on) { + result = inv_set_dmp_sysfs(st); + if (result) + return result; + } + + if (st->chip_config.gyro_enable) { + result = st->switch_gyro_engine(st, true); + if (result) + return result; + } + if (st->chip_config.accl_enable) { + result = st->switch_accl_engine(st, true); + if (result) + return result; + } + result = inv_reset_fifo(indio_dev); if (result) return result; - st->chip_config.enable = 1; } else { + if ((INV_MPU3050 != st->chip_type) + && st->chip_config.lpa_mode) { + /* if the chip is in low power mode, + register write/read could fail */ + result = inv_lpa_mode(st, 0); + if (result) + return result; + } result = inv_i2c_single_write(st, reg->fifo_en, 0); - if (result) - return result; - result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) return result; /* disable fifo reading */ if (INV_MPU3050 != st->chip_type) { - result = inv_i2c_single_write(st, reg->user_ctrl, 0); + result = inv_i2c_single_write(st, reg->int_enable, 0); if (result) return result; + result = inv_i2c_single_write(st, reg->user_ctrl, 0); + } else { + result = inv_i2c_single_write(st, reg->int_enable, + st->plat_data.int_config); } - st->chip_config.enable = 0; + if (result) + return result; + /* turn off the gyro/accl engine during disable phase */ + result = st->switch_gyro_engine(st, false); + if (result) + return result; + result = st->switch_accl_engine(st, false); + if (result) + return result; + result = st->set_power_state(st, false); + if (result) + return result; } - return 0; -} + st->chip_config.enable = enable; -/** - * inv_clear_kfifo() - clear time stamp fifo - * @st: Device driver instance. - */ -void inv_clear_kfifo(struct inv_gyro_state_s *st) -{ - unsigned long flags; - spin_lock_irqsave(&st->time_stamp_lock, flags); - kfifo_reset(&st->timestamps); - spin_unlock_irqrestore(&st->time_stamp_lock, flags); + return 0; } /** @@ -311,35 +495,34 @@ void inv_clear_kfifo(struct inv_gyro_state_s *st) */ static irqreturn_t inv_irq_handler(int irq, void *dev_id) { - struct inv_gyro_state_s *st; - long long timestamp; - int result, catch_up; - unsigned int time_since_last_irq; - - st = (struct inv_gyro_state_s *)dev_id; - timestamp = iio_get_time_ns(); - time_since_last_irq = ((unsigned int)(timestamp - - st->last_isr_time))/ONE_K_HZ; + struct inv_mpu_iio_s *st; + u64 timestamp; + int catch_up; + u64 time_since_last_irq; + + st = (struct inv_mpu_iio_s *)dev_id; + timestamp = get_time_ns(); + time_since_last_irq = timestamp - st->last_isr_time; spin_lock(&st->time_stamp_lock); catch_up = 0; - while ((time_since_last_irq > st->irq_dur_us*2) - && (catch_up < MAX_CATCH_UP) - && (0 == st->chip_config.lpa_mode)) { - - st->last_isr_time += st->irq_dur_us*ONE_K_HZ; - result = kfifo_in(&st->timestamps, - &st->last_isr_time, 1); - time_since_last_irq = ((unsigned int)(timestamp - - st->last_isr_time))/ONE_K_HZ; + while ((time_since_last_irq > st->irq_dur_ns * 2) && + (catch_up < MAX_CATCH_UP) && + (!st->chip_config.lpa_mode) && + (!st->chip_config.dmp_on)) { + st->last_isr_time += st->irq_dur_ns; + kfifo_in(&st->timestamps, + &st->last_isr_time, 1); + time_since_last_irq = timestamp - st->last_isr_time; catch_up++; } - result = kfifo_in(&st->timestamps, ×tamp, 1); + kfifo_in(&st->timestamps, ×tamp, 1); st->last_isr_time = timestamp; spin_unlock(&st->time_stamp_lock); return IRQ_WAKE_THREAD; } -static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, + +static int put_scan_to_buf(struct iio_dev *indio_dev, u8 *d, short *s, int scan_index, int d_ind) { struct iio_buffer *ring = indio_dev->buffer; int st; @@ -353,31 +536,17 @@ static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, } return d_ind; } -static int put_scan_to_buf_q(struct iio_dev *indio_dev, unsigned char *d, - int *s, int scan_index, int d_ind) { - struct iio_buffer *ring = indio_dev->buffer; - int st; - int i; - for (i = 0; i < 4; i++) { - st = iio_scan_mask_query(indio_dev, ring, scan_index + i); - if (st) { - memcpy(&d[d_ind], &s[i], sizeof(s[i])); - d_ind += sizeof(s[i]); - } - } - return d_ind; -} static void inv_report_data_3050(struct iio_dev *indio_dev, s64 t, - int has_footer, unsigned char *data) + int has_footer, u8 *data) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct iio_buffer *ring = indio_dev->buffer; int ind, i, d_ind; struct inv_chip_config_s *conf; - short g[3], a[3]; + short g[THREE_AXIS], a[THREE_AXIS]; s64 buf[8]; - unsigned char *tmp; + u8 *tmp; int bytes_per_datum, scan_count; conf = &st->chip_config; @@ -388,41 +557,48 @@ static void inv_report_data_3050(struct iio_dev *indio_dev, s64 t, ind = 0; if (has_footer) ind += 2; - tmp = (unsigned char *)buf; + tmp = (u8 *)buf; d_ind = 0; + if (conf->gyro_fifo_enable) { - g[0] = be16_to_cpup((__be16 *)(&data[ind])); - g[1] = be16_to_cpup((__be16 *)(&data[ind+2])); - g[2] = be16_to_cpup((__be16 *)(&data[ind+4])); - ind += 6; + for (i = 0; i < ARRAY_SIZE(g); i++) { + g[i] = be16_to_cpup((__be16 *)(&data[ind + i * 2])); + st->raw_gyro[i] = g[i]; + } + ind += BYTES_PER_SENSOR; d_ind = put_scan_to_buf(indio_dev, tmp, g, INV_MPU_SCAN_GYRO_X, d_ind); } if (conf->accl_fifo_enable) { st->mpu_slave->combine_data(&data[ind], a); - ind += 6; + for (i = 0; i < ARRAY_SIZE(a); i++) + st->raw_accel[i] = a[i]; + + ind += BYTES_PER_SENSOR; d_ind = put_scan_to_buf(indio_dev, tmp, a, INV_MPU_SCAN_ACCL_X, d_ind); } - i = (bytes_per_datum + 7)/8; + i = (bytes_per_datum + 7) / 8; if (ring->scan_timestamp) buf[i] = t; - ring->access->store_to(indio_dev->buffer, (u8 *) buf, t); + ring->access->store_to(indio_dev->buffer, (u8 *)buf, t); } + /** - * inv_read_fifo_mpu3050() - Transfer data from FIFO to ring buffer for mpu3050. + * inv_read_fifo_mpu3050() - Transfer data from FIFO to ring buffer for + * mpu3050. */ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) { - struct inv_gyro_state_s *st = (struct inv_gyro_state_s *)dev_id; + struct inv_mpu_iio_s *st = (struct inv_mpu_iio_s *)dev_id; struct iio_dev *indio_dev = iio_priv_to_dev(st); int bytes_per_datum; - unsigned char data[64]; + u8 data[64]; int result; short fifo_count, byte_read; - unsigned int copied; + u32 copied; s64 timestamp; struct inv_reg_map_s *reg; reg = &st->reg; @@ -435,7 +611,7 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) bytes_per_datum = (st->chip_config.accl_fifo_enable + st->chip_config.gyro_fifo_enable)*BYTES_PER_SENSOR; if (st->chip_config.has_footer) - byte_read = bytes_per_datum + 2; + byte_read = bytes_per_datum + MPU3050_FOOTER_SIZE; else byte_read = bytes_per_datum; @@ -445,10 +621,10 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) FIFO_COUNT_BYTE, data); if (result) goto end_session; - fifo_count = (data[0] << 8) + data[1]; + fifo_count = be16_to_cpup((__be16 *)(&data[0])); if (fifo_count < byte_read) goto end_session; - if (fifo_count%2) + if (fifo_count & 1) goto flush_fifo; if (fifo_count > FIFO_THRESHOLD) goto flush_fifo; @@ -463,8 +639,9 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) ×tamp, sizeof(timestamp), &copied); if (result) goto flush_fifo; - } else + } else { goto flush_fifo; + } } } while ((bytes_per_datum != 0) && (fifo_count >= byte_read)) { @@ -477,204 +654,261 @@ irqreturn_t inv_read_fifo_mpu3050(int irq, void *dev_id) if (result) goto flush_fifo; inv_report_data_3050(indio_dev, timestamp, - st->chip_config.has_footer, data); + st->chip_config.has_footer, data); fifo_count -= byte_read; if (st->chip_config.has_footer == 0) { st->chip_config.has_footer = 1; byte_read = bytes_per_datum + MPU3050_FOOTER_SIZE; } } + end_session: return IRQ_HANDLED; + flush_fifo: /* Flush HW and SW FIFOs. */ inv_reset_fifo(indio_dev); inv_clear_kfifo(st); return IRQ_HANDLED; } + static int inv_report_gyro_accl_compass(struct iio_dev *indio_dev, - unsigned char *data, s64 t) + u8 *data, s64 t) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); - struct iio_buffer *ring = indio_dev->buffer; - short g[3], a[3], c[3]; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + short g[THREE_AXIS], a[THREE_AXIS], c[THREE_AXIS]; int q[4]; - int result, ind, d_ind; - s64 buf[8]; - unsigned int word; - unsigned char d[8]; - unsigned char *tmp; - int source; + int result, ind; + u32 word; + u8 d[8], compass_divider; + u8 buf[64]; + u64 *tmp; + int source, i; struct inv_chip_config_s *conf; -#define INT_SRC_TAP 0x01 -#define INT_SRC_ORIENT 0x02 -#define INT_SRC_DISPLAY_ORIENT 0x08 -#define INT_SRC_SHAKE 0x10 conf = &st->chip_config; ind = 0; + if (conf->quaternion_on & conf->dmp_on) { - q[0] = be32_to_cpup((__be32 *)(&data[ind])); - q[1] = be32_to_cpup((__be32 *)(&data[ind+4])); - q[2] = be32_to_cpup((__be32 *)(&data[ind+8])); - q[3] = be32_to_cpup((__be32 *)(&data[ind+12])); - ind += 16; + for (i = 0; i < ARRAY_SIZE(q); i++) { + q[i] = be32_to_cpup((__be32 *)(&data[ind + i * 4])); + st->raw_quaternion[i] = q[i]; + memcpy(&buf[ind + i * sizeof(q[i])], &q[i], + sizeof(q[i])); + } + ind += QUATERNION_BYTES; } - if (conf->accl_fifo_enable | conf->dmp_on) { - a[0] = be16_to_cpup((__be16 *)(&data[ind])); - a[1] = be16_to_cpup((__be16 *)(&data[ind+2])); - a[2] = be16_to_cpup((__be16 *)(&data[ind+4])); - - a[0] *= st->chip_info.multi; - a[1] *= st->chip_info.multi; - a[2] *= st->chip_info.multi; - st->raw_accel[0] = a[0]; - st->raw_accel[1] = a[1]; - st->raw_accel[2] = a[2]; - ind += 6; + + if (conf->accl_fifo_enable) { + for (i = 0; i < ARRAY_SIZE(a); i++) { + a[i] = be16_to_cpup((__be16 *)(&data[ind + i * 2])); + memcpy(&buf[ind + i * sizeof(a[i])], &a[i], + sizeof(a[i])); + } + ind += BYTES_PER_SENSOR; } - if (conf->gyro_fifo_enable | conf->dmp_on) { - g[0] = be16_to_cpup((__be16 *)(&data[ind])); - g[1] = be16_to_cpup((__be16 *)(&data[ind+2])); - g[2] = be16_to_cpup((__be16 *)(&data[ind+4])); - - st->raw_gyro[0] = g[0]; - st->raw_gyro[1] = g[1]; - st->raw_gyro[2] = g[2]; - ind += 6; + + if (conf->gyro_fifo_enable) { + for (i = 0; i < ARRAY_SIZE(g); i++) { + g[i] = be16_to_cpup((__be16 *)(&data[ind + i * 2])); + memcpy(&buf[ind + i * sizeof(g[i])], &g[i], + sizeof(g[i])); + } + ind += BYTES_PER_SENSOR; } - if (conf->dmp_on) { - word = (unsigned int)(be32_to_cpup((unsigned int *)&data[ind])); - source = (word/65536)%256; + + if (conf->dmp_on && (conf->tap_on || conf->display_orient_on)) { + word = (u32)(be32_to_cpup((u32 *)&data[ind])); + source = ((word >> 16) & 0xff); if (source) { - st->tap_data = 0x3f & (word%256); - st->orient_data = (word/256)%256; - st->display_orient_data = ((0xc0 & (word%256))>>6); + st->tap_data = (DMP_MASK_TAP & (word & 0xff)); + st->display_orient_data = + ((DMP_MASK_DIS_ORIEN & (word & 0xff)) >> + DMP_DIS_ORIEN_SHIFT); } /* report tap information */ if (source & INT_SRC_TAP) sysfs_notify(&indio_dev->dev.kobj, NULL, "event_tap"); /* report orientation information */ - if (source & INT_SRC_ORIENT) - sysfs_notify(&indio_dev->dev.kobj, NULL, - "event_orientation"); - /* report orientation information */ if (source & INT_SRC_DISPLAY_ORIENT) sysfs_notify(&indio_dev->dev.kobj, NULL, - "event_display_orientation"); + "event_display_orientation"); } /*divider and counter is used to decrease the speed of read in high frequency sample rate*/ if (conf->compass_fifo_enable) { - c[0] = c[1] = c[2] = 0; - if (st->compass_divider == st->compass_counter) { + c[0] = 0; + c[1] = 0; + c[2] = 0; + if (conf->dmp_on) + compass_divider = st->compass_dmp_divider; + else + compass_divider = st->compass_divider; + if (compass_divider <= st->compass_counter) { /*read from external sensor data register */ - result = inv_i2c_read(st, REG_EXT_SENS_DATA_00, 8, d); + result = inv_i2c_read(st, REG_EXT_SENS_DATA_00, + NUM_BYTES_COMPASS_SLAVE, d); /* d[7] is status 2 register */ /*for AKM8975, bit 2 and 3 should be all be zero*/ /* for AMK8963, bit 3 should be zero*/ - if ((DATA_AKM_DRDY == d[0]) - && (0 == (d[7] & DATA_AKM_STAT_MASK)) - && (!result)) { - unsigned char *sens; + if ((DATA_AKM_DRDY == d[0]) && + (0 == (d[7] & DATA_AKM_STAT_MASK)) && + (!result)) { + u8 *sens; sens = st->chip_info.compass_sens; c[0] = (short)((d[2] << 8) | d[1]); c[1] = (short)((d[4] << 8) | d[3]); c[2] = (short)((d[6] << 8) | d[5]); - c[0] = ((c[0] * (sens[0] + 128)) >> 8); - c[1] = ((c[1] * (sens[1] + 128)) >> 8); - c[2] = ((c[2] * (sens[2] + 128)) >> 8); + c[0] = (short)(((int)c[0] * + (sens[0] + 128)) >> 8); + c[1] = (short)(((int)c[1] * + (sens[1] + 128)) >> 8); + c[2] = (short)(((int)c[2] * + (sens[2] + 128)) >> 8); st->raw_compass[0] = c[0]; st->raw_compass[1] = c[1]; st->raw_compass[2] = c[2]; } st->compass_counter = 0; - } else if (st->compass_divider != 0) + } else if (compass_divider != 0) { st->compass_counter++; + } + if (!conf->normal_compass_measure) { + c[0] = 0; + c[1] = 0; + c[2] = 0; + conf->normal_compass_measure = 1; + } + for (i = 0; i < 3; i++) + memcpy(&buf[ind + i * sizeof(c[i])], &c[i], + sizeof(c[i])); + ind += BYTES_PER_SENSOR; } + tmp = (u64 *)buf; + tmp[DIV_ROUND_UP(ind, 8)] = t; - tmp = (unsigned char *)buf; - d_ind = 0; - if (conf->quaternion_on & conf->dmp_on) - d_ind = put_scan_to_buf_q(indio_dev, tmp, q, - INV_MPU_SCAN_QUAT_R, d_ind); - if (conf->gyro_fifo_enable) - d_ind = put_scan_to_buf(indio_dev, tmp, g, - INV_MPU_SCAN_GYRO_X, d_ind); - if (conf->accl_fifo_enable) - d_ind = put_scan_to_buf(indio_dev, tmp, a, - INV_MPU_SCAN_ACCL_X, d_ind); - if (conf->compass_fifo_enable) - d_ind = put_scan_to_buf(indio_dev, tmp, c, - INV_MPU_SCAN_MAGN_X, d_ind); - if (ring->scan_timestamp) - buf[(d_ind + 7)/8] = t; - ring->access->store_to(indio_dev->buffer, (u8 *) buf, t); + if (ind > 0) + iio_push_to_buffer(indio_dev->buffer, buf, t); return 0; } +static void inv_process_motion(struct inv_mpu_iio_s *st) +{ + struct iio_dev *indio_dev = iio_priv_to_dev(st); + s32 diff, true_motion; + s64 timestamp; + int result; + u8 data[1]; + + /* motion interrupt */ + result = inv_i2c_read(st, REG_INT_STATUS, 1, data); + if (result) + return; + + if (data[0] & BIT_MOT_INT) { + timestamp = get_time_ns(); + diff = (int)(((timestamp - st->mpu6500_last_motion_time) >> + NS_PER_MS_SHIFT)); + if (diff > st->mot_int.mot_dur) { + st->mpu6500_last_motion_time = timestamp; + true_motion = 1; + } else { + true_motion = 0; + } + if (true_motion) + sysfs_notify(&indio_dev->dev.kobj, NULL, + "event_accel_motion"); + } +} + +static int get_bytes_per_datum(struct inv_mpu_iio_s *st) +{ + int bytes_per_datum; + + bytes_per_datum = 0; + if (st->chip_config.dmp_on) { + if (st->chip_config.quaternion_on) + bytes_per_datum += QUATERNION_BYTES; + if (st->chip_config.tap_on || + st->chip_config.display_orient_on) + bytes_per_datum += BYTES_FOR_EVENTS; + } + if (st->chip_config.accl_fifo_enable) + bytes_per_datum += BYTES_PER_SENSOR; + if (st->chip_config.gyro_fifo_enable) + bytes_per_datum += BYTES_PER_SENSOR; + + return bytes_per_datum; +} + /** * inv_read_fifo() - Transfer data from FIFO to ring buffer. */ irqreturn_t inv_read_fifo(int irq, void *dev_id) { - struct inv_gyro_state_s *st = (struct inv_gyro_state_s *)dev_id; + struct inv_mpu_iio_s *st = (struct inv_mpu_iio_s *)dev_id; struct iio_dev *indio_dev = iio_priv_to_dev(st); size_t bytes_per_datum; int result; - unsigned char data[BYTES_FOR_DMP + QUATERNION_BYTES]; - unsigned short fifo_count; - unsigned int copied; + u8 data[BYTES_FOR_DMP + QUATERNION_BYTES]; + u16 fifo_count; + u32 copied; s64 timestamp; struct inv_reg_map_s *reg; s64 buf[8]; - unsigned char *tmp; + s8 *tmp; + + mutex_lock(&indio_dev->mlock); + if (!(iio_buffer_enabled(indio_dev))) + goto end_session; + reg = &st->reg; if (!(st->chip_config.accl_fifo_enable | st->chip_config.gyro_fifo_enable | st->chip_config.dmp_on | - st->chip_config.compass_fifo_enable)) + st->chip_config.compass_fifo_enable | + st->mot_int.mot_on)) goto end_session; - if (st->chip_config.dmp_on && st->chip_config.flick_int_on) { - /*dmp interrupt status */ - inv_i2c_read(st, REG_DMP_INT_STATUS, 1, data); - if (data[0] & 8) - sysfs_notify(&indio_dev->dev.kobj, NULL, "event_flick"); + if (st->mot_int.mot_on) + inv_process_motion(st); + if (st->chip_config.dmp_on && st->chip_config.smd_enable) { + /* dmp interrupt status */ + result = inv_i2c_read(st, REG_DMP_INT_STATUS, 1, data); + if (!result) + if (data[0] & SMD_INT_ON) { + sysfs_notify(&indio_dev->dev.kobj, NULL, + "event_smd"); + st->chip_config.smd_enable = 0; + } } if (st->chip_config.lpa_mode) { - result = inv_i2c_read(st, reg->raw_accl, 6, data); + result = inv_i2c_read(st, reg->raw_accl, + BYTES_PER_SENSOR, data); if (result) goto end_session; inv_report_gyro_accl_compass(indio_dev, data, - iio_get_time_ns()); + get_time_ns()); goto end_session; } - - if (st->chip_config.dmp_on) - if (st->chip_config.quaternion_on) - bytes_per_datum = BYTES_FOR_DMP + QUATERNION_BYTES; - else - bytes_per_datum = BYTES_FOR_DMP; - else - bytes_per_datum = (st->chip_config.accl_fifo_enable + - st->chip_config.gyro_fifo_enable)*BYTES_PER_SENSOR; + bytes_per_datum = get_bytes_per_datum(st); fifo_count = 0; if (bytes_per_datum != 0) { result = inv_i2c_read(st, reg->fifo_count_h, FIFO_COUNT_BYTE, data); if (result) goto end_session; - fifo_count = (data[0] << 8) + data[1]; + fifo_count = be16_to_cpup((__be16 *)(&data[0])); if (fifo_count < bytes_per_datum) goto end_session; - if (fifo_count%2) + /* fifo count can't be odd number */ + if (fifo_count & 1) goto flush_fifo; if (fifo_count > FIFO_THRESHOLD) goto flush_fifo; - /* Timestamp mismatch. */ + /* timestamp mismatch. */ if (kfifo_len(&st->timestamps) < fifo_count / bytes_per_datum) goto flush_fifo; @@ -685,17 +919,17 @@ irqreturn_t inv_read_fifo(int irq, void *dev_id) ×tamp, sizeof(timestamp), &copied); if (result) goto flush_fifo; - } else + } else { goto flush_fifo; + } } - } - if (bytes_per_datum == 0) { + } else { result = kfifo_to_user(&st->timestamps, ×tamp, sizeof(timestamp), &copied); if (result) goto flush_fifo; } - tmp = (char *)buf; + tmp = (s8 *)buf; while ((bytes_per_datum != 0) && (fifo_count >= bytes_per_datum)) { result = inv_i2c_read(st, reg->fifo_r_w, bytes_per_datum, data); @@ -709,51 +943,178 @@ irqreturn_t inv_read_fifo(int irq, void *dev_id) inv_report_gyro_accl_compass(indio_dev, data, timestamp); fifo_count -= bytes_per_datum; } - if (bytes_per_datum == 0) + if (bytes_per_datum == 0 && st->chip_config.compass_fifo_enable) inv_report_gyro_accl_compass(indio_dev, data, timestamp); + end_session: + mutex_unlock(&indio_dev->mlock); + return IRQ_HANDLED; + flush_fifo: /* Flush HW and SW FIFOs. */ inv_reset_fifo(indio_dev); inv_clear_kfifo(st); + mutex_unlock(&indio_dev->mlock); + return IRQ_HANDLED; } void inv_mpu_unconfigure_ring(struct iio_dev *indio_dev) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); - free_irq(st->i2c->irq, st); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + free_irq(st->client->irq, st); iio_kfifo_free(indio_dev->buffer); }; -int inv_postenable(struct iio_dev *indio_dev) +static int inv_postenable(struct iio_dev *indio_dev) { - return set_inv_enable(indio_dev, 1); + return set_inv_enable(indio_dev, true); +} +static int inv_predisable(struct iio_dev *indio_dev) +{ + return set_inv_enable(indio_dev, false); } -int inv_predisable(struct iio_dev *indio_dev) + +static void inv_scan_query(struct iio_dev *indio_dev) { - return set_inv_enable(indio_dev, 0); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int result; + + if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_GYRO_Z)) + st->chip_config.gyro_fifo_enable = 1; + else + st->chip_config.gyro_fifo_enable = 0; + + if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_ACCL_Z)) + st->chip_config.accl_fifo_enable = 1; + else + st->chip_config.accl_fifo_enable = 0; + + if (iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_MAGN_Z)) + st->chip_config.compass_fifo_enable = 1; + else + st->chip_config.compass_fifo_enable = 0; + + /* check to make sure engine is turned on if fifo is turned on */ + if (st->chip_config.gyro_fifo_enable && + (!st->chip_config.gyro_enable)) { + result = st->switch_gyro_engine(st, true); + if (result) + return; + st->chip_config.gyro_enable = true; + } + if (st->chip_config.accl_fifo_enable && + (!st->chip_config.accl_enable)) { + result = st->switch_accl_engine(st, true); + if (result) + return; + st->chip_config.accl_enable = true; + } +} + +static int inv_check_quaternion(struct iio_dev *indio_dev) +{ + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int result; + + if (st->chip_config.dmp_on) { + if ( + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_R) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_X) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_Y) || + iio_scan_mask_query(indio_dev, ring, INV_MPU_SCAN_QUAT_Z)) + st->chip_config.quaternion_on = 1; + else + st->chip_config.quaternion_on = 0; + + result = inv_send_quaternion(st, + st->chip_config.quaternion_on); + if (result) + return result; + } else { + st->chip_config.quaternion_on = 0; + clear_bit(INV_MPU_SCAN_QUAT_R, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_X, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Y, ring->scan_mask); + clear_bit(INV_MPU_SCAN_QUAT_Z, ring->scan_mask); + } + + return 0; +} + +static int inv_check_conflict_sysfs(struct iio_dev *indio_dev) +{ + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int result; + + if (st->chip_config.lpa_mode) { + /* dmp cannot run with low power mode on */ + st->chip_config.dmp_on = 0; + result = st->gyro_en(st, ring, false); + if (result) + return result; + result = st->compass_en(st, ring, false); + if (result) + return result; + result = st->quaternion_en(st, ring, false); + if (result) + return result; + + result = st->accl_en(st, ring, true); + if (result) + return result; + } + result = inv_check_quaternion(indio_dev); + if (result) + return result; + + return result; +} + +static int inv_preenable(struct iio_dev *indio_dev) +{ + int result; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); + + result = st->set_power_state(st, true); + if (result) + return result; + + result = inv_check_conflict_sysfs(indio_dev); + if (result) + return result; + inv_scan_query(indio_dev); + result = iio_sw_buffer_preenable(indio_dev); + + return result; } static const struct iio_buffer_setup_ops inv_mpu_ring_setup_ops = { - .preenable = &iio_sw_buffer_preenable, + .preenable = &inv_preenable, .postenable = &inv_postenable, .predisable = &inv_predisable, }; int inv_mpu_configure_ring(struct iio_dev *indio_dev) { - int ret = 0; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + int ret; + struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct iio_buffer *ring; ring = iio_kfifo_allocate(indio_dev); - if (!ring) { - ret = -ENOMEM; - return ret; - } + if (!ring) + return -ENOMEM; indio_dev->buffer = ring; /* setup ring buffer */ ring->scan_timestamp = true; @@ -761,11 +1122,11 @@ int inv_mpu_configure_ring(struct iio_dev *indio_dev) /*scan count double count timestamp. should subtract 1. but number of channels still includes timestamp*/ if (INV_MPU3050 == st->chip_type) - ret = request_threaded_irq(st->i2c->irq, inv_irq_handler, + ret = request_threaded_irq(st->client->irq, inv_irq_handler, inv_read_fifo_mpu3050, IRQF_TRIGGER_RISING | IRQF_SHARED, "inv_irq", st); else - ret = request_threaded_irq(st->i2c->irq, inv_irq_handler, + ret = request_threaded_irq(st->client->irq, inv_irq_handler, inv_read_fifo, IRQF_TRIGGER_RISING | IRQF_SHARED, "inv_irq", st); if (ret) @@ -777,6 +1138,7 @@ int inv_mpu_configure_ring(struct iio_dev *indio_dev) iio_kfifo_free(indio_dev->buffer); return ret; } + /** * @} */ diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c index a36c7947583..2dd8c0c4a0c 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c @@ -17,9 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_mpu3050.c + * @file inv_mpu_trigger.c * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @details This file is part of inv mpu iio driver code */ #include @@ -40,10 +40,11 @@ #include "../../iio.h" #include "../../sysfs.h" #include "../../trigger.h" + #include "inv_mpu_iio.h" /** - * inv_mpu_data_rdy_trigger_set_state() set datardy interrupt state + * inv_mpu_data_rdy_trigger_set_state() set data ready interrupt state **/ static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) @@ -62,36 +63,30 @@ static const struct iio_trigger_ops inv_mpu_trigger_ops = { int inv_mpu_probe_trigger(struct iio_dev *indio_dev) { int ret; - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); st->trig = iio_allocate_trigger("%s-dev%d", indio_dev->name, indio_dev->id); - if (st->trig == NULL) { - ret = -ENOMEM; - goto error_ret; - } - - /* select default trigger */ - st->trig->dev.parent = &st->i2c->dev; + if (st->trig == NULL) + return -ENOMEM; + st->trig->dev.parent = &st->client->dev; st->trig->private_data = indio_dev; st->trig->ops = &inv_mpu_trigger_ops; ret = iio_trigger_register(st->trig); - /* select default trigger */ + if (ret) { + iio_free_trigger(st->trig); + return -EPERM; + } indio_dev->trig = st->trig; - if (ret) - goto error_ret; return 0; - -error_ret: - return ret; } void inv_mpu_remove_trigger(struct iio_dev *indio_dev) { - struct inv_gyro_state_s *st = iio_priv(indio_dev); + struct inv_mpu_iio_s *st = iio_priv(indio_dev); iio_trigger_unregister(st->trig); iio_free_trigger(st->trig); diff --git a/drivers/staging/iio/imu/mpu/inv_slave_bma250.c b/drivers/staging/iio/imu/mpu/inv_slave_bma250.c index 85c6cc9f2c4..bd84f637af5 100644 --- a/drivers/staging/iio/imu/mpu/inv_slave_bma250.c +++ b/drivers/staging/iio/imu/mpu/inv_slave_bma250.c @@ -19,7 +19,7 @@ * @{ * @file inv_slave_bma250.c * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @details This file is part of invensense mpu driver code * */ @@ -39,44 +39,43 @@ #include #include "inv_mpu_iio.h" -#define BMA250_CHIP_ID (3) -#define BMA250_RANGE_SET (0) -#define BMA250_BW_SET (4) +#define BMA250_CHIP_ID 3 +#define BMA250_RANGE_SET 0 +#define BMA250_BW_SET 4 /* range and bandwidth */ +#define BMA250_RANGE_2G 3 +#define BMA250_RANGE_4G 5 +#define BMA250_RANGE_8G 8 +#define BMA250_RANGE_16G 12 +#define BMA250_RANGE_MAX 4 +#define BMA250_RANGE_MASK 0xF0 -#define BMA250_RANGE_2G (3) -#define BMA250_RANGE_4G (5) -#define BMA250_RANGE_8G (8) -#define BMA250_RANGE_16G (12) -#define BMA250_RANGE_MAX (4) -#define BMA250_RANGE_MASK (0xF0) - -#define BMA250_BW_7_81HZ (0x08) -#define BMA250_BW_15_63HZ (0x09) -#define BMA250_BW_31_25HZ (0x0A) -#define BMA250_BW_62_50HZ (0x0B) -#define BMA250_BW_125HZ (0x0C) -#define BMA250_BW_250HZ (0x0D) -#define BMA250_BW_500HZ (0x0E) -#define BMA250_BW_1000HZ (0x0F) -#define BMA250_MAX_BW_SIZE (8) -#define BMA250_BW_REG_MASK (0xE0) +#define BMA250_BW_7_81HZ 0x08 +#define BMA250_BW_15_63HZ 0x09 +#define BMA250_BW_31_25HZ 0x0A +#define BMA250_BW_62_50HZ 0x0B +#define BMA250_BW_125HZ 0x0C +#define BMA250_BW_250HZ 0x0D +#define BMA250_BW_500HZ 0x0E +#define BMA250_BW_1000HZ 0x0F +#define BMA250_MAX_BW_SIZE 8 +#define BMA250_BW_REG_MASK 0xE0 /* register definitions */ -#define BMA250_X_AXIS_LSB_REG (0x02) -#define BMA250_RANGE_SEL_REG (0x0F) -#define BMA250_BW_SEL_REG (0x10) -#define BMA250_MODE_CTRL_REG (0x11) +#define BMA250_X_AXIS_LSB_REG 0x02 +#define BMA250_RANGE_SEL_REG 0x0F +#define BMA250_BW_SEL_REG 0x10 +#define BMA250_MODE_CTRL_REG 0x11 /* mode settings */ -#define BMA250_MODE_NORMAL (0) -#define BMA250_MODE_LOWPOWER (1) -#define BMA250_MODE_SUSPEND (2) -#define BMA250_MODE_MAX (3) -#define BMA250_MODE_MASK (0x3F) -#define BMA250_BIT_SUSPEND (0x80) -#define BMA250_BIT_LP (0x40) +#define BMA250_MODE_NORMAL 0 +#define BMA250_MODE_LOWPOWER 1 +#define BMA250_MODE_SUSPEND 2 +#define BMA250_MODE_MAX 3 +#define BMA250_MODE_MASK 0x3F +#define BMA250_BIT_SUSPEND 0x80 +#define BMA250_BIT_LP 0x40 struct bma_property { int range; @@ -90,57 +89,53 @@ static struct bma_property bma_static_property = { .mode = BMA250_MODE_SUSPEND }; -static int bma250_set_bandwidth(struct inv_gyro_state_s *st, unsigned char BW) +static int bma250_set_bandwidth(struct inv_mpu_iio_s *st, u8 bw) { - int res = 0; - unsigned char data; - int Bandwidth = 0; - if (BW >= BMA250_MAX_BW_SIZE) - return -1; - switch (BW) { + int res; + u8 data; + int bandwidth; + switch (bw) { case 0: - Bandwidth = BMA250_BW_7_81HZ; + bandwidth = BMA250_BW_7_81HZ; break; case 1: - Bandwidth = BMA250_BW_15_63HZ; + bandwidth = BMA250_BW_15_63HZ; break; case 2: - Bandwidth = BMA250_BW_31_25HZ; + bandwidth = BMA250_BW_31_25HZ; break; case 3: - Bandwidth = BMA250_BW_62_50HZ; + bandwidth = BMA250_BW_62_50HZ; break; case 4: - Bandwidth = BMA250_BW_125HZ; + bandwidth = BMA250_BW_125HZ; break; case 5: - Bandwidth = BMA250_BW_250HZ; + bandwidth = BMA250_BW_250HZ; break; case 6: - Bandwidth = BMA250_BW_500HZ; + bandwidth = BMA250_BW_500HZ; break; case 7: - Bandwidth = BMA250_BW_1000HZ; + bandwidth = BMA250_BW_1000HZ; break; default: - break; + return -EINVAL; } res = inv_secondary_read(BMA250_BW_SEL_REG, 1, &data); if (res) return res; data &= BMA250_BW_REG_MASK; - data |= Bandwidth; + data |= bandwidth; res = inv_secondary_write(BMA250_BW_SEL_REG, data); return res; } -static int bma250_set_range(struct inv_gyro_state_s *st, unsigned char Range) +static int bma250_set_range(struct inv_mpu_iio_s *st, u8 range) { - int res = 0; - unsigned char orig, data = 0; - if (Range >= BMA250_RANGE_MAX) - return -1; - switch (Range) { + int res; + u8 orig, data; + switch (range) { case 0: data = BMA250_RANGE_2G; break; @@ -154,7 +149,7 @@ static int bma250_set_range(struct inv_gyro_state_s *st, unsigned char Range) data = BMA250_RANGE_16G; break; default: - break; + return -EINVAL; } res = inv_secondary_read(BMA250_RANGE_SEL_REG, 1, &orig); if (res) @@ -162,15 +157,18 @@ static int bma250_set_range(struct inv_gyro_state_s *st, unsigned char Range) orig &= BMA250_RANGE_MASK; data |= orig; res = inv_secondary_write(BMA250_RANGE_SEL_REG, data); - bma_static_property.range = Range; - return res; + if (res) + return res; + bma_static_property.range = range; + + return 0; } -static int setup_slave_bma250(struct inv_gyro_state_s *st) +static int setup_slave_bma250(struct inv_mpu_iio_s *st) { int result; - unsigned char data[2]; - result = set_3050_bypass(st, 1); + u8 data[2]; + result = set_3050_bypass(st, true); if (result) return result; /*read secondary i2c ID register */ @@ -178,29 +176,28 @@ static int setup_slave_bma250(struct inv_gyro_state_s *st) if (result) return result; if (BMA250_CHIP_ID != data[0]) - return result; - result = set_3050_bypass(st, 0); + return -EINVAL; + result = set_3050_bypass(st, false); if (result) return result; /*AUX(accel), slave address is set inside set_3050_bypass*/ /* bma250 x axis LSB register address is 2 */ result = inv_i2c_single_write(st, REG_3050_AUX_BST_ADDR, BMA250_X_AXIS_LSB_REG); + return result; } -static int bma250_set_mode(struct inv_gyro_state_s *st, unsigned char Mode) +static int bma250_set_mode(struct inv_mpu_iio_s *st, u8 mode) { - int res = 0; - unsigned char data = 0; + int res; + u8 data; - if (Mode >= BMA250_RANGE_MASK) - return -1; res = inv_secondary_read(BMA250_MODE_CTRL_REG, 1, &data); if (res) return res; data &= BMA250_MODE_MASK; - switch (Mode) { + switch (mode) { case BMA250_MODE_NORMAL: break; case BMA250_MODE_LOWPOWER: @@ -210,94 +207,105 @@ static int bma250_set_mode(struct inv_gyro_state_s *st, unsigned char Mode) data |= BMA250_BIT_SUSPEND; break; default: - break; + return -EINVAL; } res = inv_secondary_write(BMA250_MODE_CTRL_REG, data); - bma_static_property.mode = Mode; - return res; + if (res) + return res; + bma_static_property.mode = mode; + + return 0; } -static int suspend_slave_bma250(struct inv_gyro_state_s *st) + +static int suspend_slave_bma250(struct inv_mpu_iio_s *st) { int result; if (bma_static_property.mode == BMA250_MODE_SUSPEND) return 0; /*set to bypass mode */ - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; bma250_set_mode(st, BMA250_MODE_SUSPEND); /* no need to recover to non-bypass mode because we need it now */ - return result; + + return 0; } -static int resume_slave_bma250(struct inv_gyro_state_s *st) + +static int resume_slave_bma250(struct inv_mpu_iio_s *st) { int result; if (bma_static_property.mode == BMA250_MODE_NORMAL) return 0; /*set to bypass mode */ - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; - bma250_set_mode(st, BMA250_MODE_NORMAL); + result = bma250_set_mode(st, BMA250_MODE_NORMAL); /* recover bypass mode */ - result = set_3050_bypass(st, 0); - return result; + result |= set_3050_bypass(st, false); + + return result ? (-EINVAL) : 0; } -static int combine_data_slave_bma250(unsigned char *in, short *out) + +static int combine_data_slave_bma250(u8 *in, short *out) { out[0] = le16_to_cpup((__le16 *)(&in[0])); out[1] = le16_to_cpup((__le16 *)(&in[2])); out[2] = le16_to_cpup((__le16 *)(&in[4])); + return 0; } -static int get_mode_slave_bma250(struct inv_gyro_state_s *st) + +static int get_mode_slave_bma250(void) { - if (bma_static_property.mode == BMA250_MODE_SUSPEND) - return 0; - else if (bma_static_property.mode == BMA250_MODE_NORMAL) - return 1; - return -1; -}; + switch (bma_static_property.mode) { + case BMA250_MODE_SUSPEND: + return INV_MODE_SUSPEND; + case BMA250_MODE_NORMAL: + return INV_MODE_NORMAL; + default: + return -EINVAL; + } +} + /** * set_lpf_bma250() - set lpf value */ -static int set_lpf_bma250(struct inv_gyro_state_s *st, int rate) +static int set_lpf_bma250(struct inv_mpu_iio_s *st, int rate) { const short hz[] = {1000, 500, 250, 125, 62, 31, 15, 7}; const int d[] = {7, 6, 5, 4, 3, 2, 1, 0}; int i, h, data, result; h = (rate >> 1); i = 0; - while ((h < hz[i]) && (i < ARRAY_SIZE(hz))) + while ((h < hz[i]) && (i < ARRAY_SIZE(hz) - 1)) i++; - if (i == ARRAY_SIZE(hz)) - i -= 1; data = d[i]; - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; - result = bma250_set_bandwidth(st, (unsigned char) data); - result |= set_3050_bypass(st, 0); + result = bma250_set_bandwidth(st, (u8) data); + result |= set_3050_bypass(st, false); - return result; + return result ? (-EINVAL) : 0; } /** * set_fs_bma250() - set range value */ -static int set_fs_bma250(struct inv_gyro_state_s *st, int fs) +static int set_fs_bma250(struct inv_mpu_iio_s *st, int fs) { int result; - result = set_3050_bypass(st, 1); + result = set_3050_bypass(st, true); if (result) return result; - result = bma250_set_range(st, (unsigned char) fs); - result |= set_3050_bypass(st, 0); - if (result) - return -EINVAL; - return result; + result = bma250_set_range(st, (u8) fs); + result |= set_3050_bypass(st, false); + + return result ? (-EINVAL) : 0; } static struct inv_mpu_slave slave_bma250 = { @@ -310,9 +318,10 @@ static struct inv_mpu_slave slave_bma250 = { .set_fs = set_fs_bma250 }; -int inv_register_bma250_slave(struct inv_gyro_state_s *st) +int inv_register_mpu3050_slave(struct inv_mpu_iio_s *st) { st->mpu_slave = &slave_bma250; + return 0; } /** diff --git a/drivers/staging/iio/inv_test/Kconfig b/drivers/staging/iio/inv_test/Kconfig new file mode 100644 index 00000000000..e96a514b28a --- /dev/null +++ b/drivers/staging/iio/inv_test/Kconfig @@ -0,0 +1,11 @@ +# +# Kconfig for Invensense IIO testing hooks +# + +config INV_TESTING + boolean "Invensense IIO testing hooks" + depends on INV_MPU_IIO || INV_AMI306_IIO || INV_YAS530 || INV_HUB_IIO + default n + help + This flag enables display of additional testing information from the + Invensense IIO drivers diff --git a/drivers/staging/iio/inv_test/Makefile b/drivers/staging/iio/inv_test/Makefile new file mode 100644 index 00000000000..4f0edd3de90 --- /dev/null +++ b/drivers/staging/iio/inv_test/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for Invensense IIO testing hooks. +# + +obj-$(CONFIG_INV_TESTING) += inv_counters.o + diff --git a/drivers/staging/iio/inv_test/inv_counters.c b/drivers/staging/iio/inv_test/inv_counters.c new file mode 100644 index 00000000000..3b26ca97284 --- /dev/null +++ b/drivers/staging/iio/inv_test/inv_counters.c @@ -0,0 +1,154 @@ +/* + * @file inv_counters.c + * @brief Exports i2c read write counts through sysfs + * + * @version 0.1 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "inv_counters.h" + +static int mpu_irq; +static int accel_irq; +static int compass_irq; + +struct inv_counters { + uint32_t i2c_tempreads; + uint32_t i2c_mpureads; + uint32_t i2c_mpuwrites; + uint32_t i2c_accelreads; + uint32_t i2c_accelwrites; + uint32_t i2c_compassreads; + uint32_t i2c_compasswrites; + uint32_t i2c_compassirq; + uint32_t i2c_accelirq; +}; + +static struct inv_counters Counters; + +static ssize_t i2c_counters_show(struct class *cls, + struct class_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, + "%ld.%03ld %u %u %u %u %u %u %u %u %u %u\n", + jiffies / HZ, ((jiffies % HZ) * (1024 / HZ)), + mpu_irq ? kstat_irqs(mpu_irq) : 0, + Counters.i2c_tempreads, + Counters.i2c_mpureads, Counters.i2c_mpuwrites, + accel_irq ? kstat_irqs(accel_irq) : Counters.i2c_accelirq, + Counters.i2c_accelreads, Counters.i2c_accelwrites, + compass_irq ? kstat_irqs(compass_irq) : Counters.i2c_compassirq, + Counters.i2c_compassreads, Counters.i2c_compasswrites); +} + +void inv_iio_counters_set_i2cirq(enum irqtype type, int irq) +{ + switch (type) { + case IRQ_MPU: + mpu_irq = irq; + break; + case IRQ_ACCEL: + accel_irq = irq; + break; + case IRQ_COMPASS: + compass_irq = irq; + break; + } +} +EXPORT_SYMBOL_GPL(inv_iio_counters_set_i2cirq); + +void inv_iio_counters_tempread(int count) +{ + Counters.i2c_tempreads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_tempread); + +void inv_iio_counters_mpuread(int count) +{ + Counters.i2c_mpureads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_mpuread); + +void inv_iio_counters_mpuwrite(int count) +{ + Counters.i2c_mpuwrites += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_mpuwrite); + +void inv_iio_counters_accelread(int count) +{ + Counters.i2c_accelreads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_accelread); + +void inv_iio_counters_accelwrite(int count) +{ + Counters.i2c_accelwrites += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_accelwrite); + +void inv_iio_counters_compassread(int count) +{ + Counters.i2c_compassreads += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_compassread); + +void inv_iio_counters_compasswrite(int count) +{ + Counters.i2c_compasswrites += count; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_compasswrite); + +void inv_iio_counters_compassirq(void) +{ + Counters.i2c_compassirq++; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_compassirq); + +void inv_iio_counters_accelirq(void) +{ + Counters.i2c_accelirq++; +} +EXPORT_SYMBOL_GPL(inv_iio_counters_accelirq); + +static struct class_attribute inv_class_attr[] = { + __ATTR(i2c_counter, S_IRUGO, i2c_counters_show, NULL), + __ATTR_NULL +}; + +static struct class inv_counters_class = { + .name = "inv_counters", + .owner = THIS_MODULE, + .class_attrs = (struct class_attribute *) &inv_class_attr +}; + +static int __init inv_counters_init(void) +{ + memset(&Counters, 0, sizeof(Counters)); + + return class_register(&inv_counters_class); +} + +static void __exit inv_counters_exit(void) +{ + class_unregister(&inv_counters_class); +} + +module_init(inv_counters_init); +module_exit(inv_counters_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("GESL"); +MODULE_DESCRIPTION("inv_counters debug support"); + diff --git a/drivers/staging/iio/inv_test/inv_counters.h b/drivers/staging/iio/inv_test/inv_counters.h new file mode 100644 index 00000000000..d60dac9d97b --- /dev/null +++ b/drivers/staging/iio/inv_test/inv_counters.h @@ -0,0 +1,72 @@ +/* + * @file inv_counters.h + * @brief Debug file to keep track of various counters for the InvenSense + * sensor drivers. + * + * @version 0.1 + */ + +#ifndef _INV_COUNTERS_H_ +#define _INV_COUNTERS_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_INV_TESTING + +enum irqtype { + IRQ_MPU, + IRQ_ACCEL, + IRQ_COMPASS +}; + +#define INV_I2C_INC_MPUREAD(x) inv_iio_counters_mpuread(x) +#define INV_I2C_INC_MPUWRITE(x) inv_iio_counters_mpuwrite(x) +#define INV_I2C_INC_ACCELREAD(x) inv_iio_counters_accelread(x) +#define INV_I2C_INC_ACCELWRITE(x) inv_iio_counters_accelwrite(x) +#define INV_I2C_INC_COMPASSREAD(x) inv_iio_counters_compassread(x) +#define INV_I2C_INC_COMPASSWRITE(x) inv_iio_counters_compasswrite(x) + +#define INV_I2C_INC_TEMPREAD(x) inv_iio_counters_tempread(x) + +#define INV_I2C_SETIRQ(type, irq) inv_iio_counters_set_i2cirq(type, irq) +#define INV_I2C_INC_COMPASSIRQ() inv_iio_counters_compassirq() +#define INV_I2C_INC_ACCELIRQ() inv_iio_counters_accelirq() + +void inv_iio_counters_mpuread(int count); +void inv_iio_counters_mpuwrite(int count); +void inv_iio_counters_accelread(int count); +void inv_iio_counters_accelwrite(int count); +void inv_iio_counters_compassread(int count); +void inv_iio_counters_compasswrite(int count); + +void inv_iio_counters_tempread(int count); + +void inv_iio_counters_set_i2cirq(enum irqtype type, int irq); +void inv_iio_counters_compassirq(void); +void inv_iio_counters_accelirq(void); + +#else + +#define INV_I2C_INC_MPUREAD(x) +#define INV_I2C_INC_MPUWRITE(x) +#define INV_I2C_INC_ACCELREAD(x) +#define INV_I2C_INC_ACCELWRITE(x) +#define INV_I2C_INC_COMPASSREAD(x) +#define INV_I2C_INC_COMPASSWRITE(x) + +#define INV_I2C_INC_TEMPREAD(x) + +#define INV_I2C_SETIRQ(type, irq) +#define INV_I2C_INC_COMPASSIRQ() +#define INV_I2C_INC_ACCELIRQ() + +#endif /* CONFIG_INV_TESTING */ + +#endif /* _INV_COUNTERS_H_ */ + diff --git a/drivers/staging/iio/magnetometer/Kconfig b/drivers/staging/iio/magnetometer/Kconfig index fe9ef564dd5..3d96ca31e9f 100644 --- a/drivers/staging/iio/magnetometer/Kconfig +++ b/drivers/staging/iio/magnetometer/Kconfig @@ -24,14 +24,5 @@ config SENSORS_HMC5843 To compile this driver as a module, choose M here: the module will be called hmc5843 -config AMI306 - tristate "invensense implementation of ami306" - depends on I2C && IIO_KFIFO_BUF && SYSFS && IIO && IIO_TRIGGER - default n - help - This driver supports the ami306. It is Invensense implementation - of ami306 compass device. - This driver can be built as a module. The module will be called - inv-ami306-iio. - +source "drivers/staging/iio/magnetometer/inv_compass/Kconfig" endmenu diff --git a/drivers/staging/iio/magnetometer/Makefile b/drivers/staging/iio/magnetometer/Makefile index 7330159a0da..71c4bd500c3 100644 --- a/drivers/staging/iio/magnetometer/Makefile +++ b/drivers/staging/iio/magnetometer/Makefile @@ -4,9 +4,5 @@ obj-$(CONFIG_SENSORS_AK8975) += ak8975.o obj-$(CONFIG_SENSORS_HMC5843) += hmc5843.o -obj-$(CONFIG_AMI306) += inv-ami306.o - -inv-ami306-objs := inv_ami306_core.o -inv-ami306-objs += inv_ami306_ring.o -inv-ami306-objs += inv_ami306_trigger.o +obj-$(CONFIG_INV_AMI306_IIO) += inv_compass/ diff --git a/drivers/staging/iio/magnetometer/inv_compass/Kconfig b/drivers/staging/iio/magnetometer/inv_compass/Kconfig new file mode 100644 index 00000000000..34e001ef829 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/Kconfig @@ -0,0 +1,25 @@ +# +# Kconfig for Invensense IIO compass drivers of 3rd party compass devices. +# + +# Yamaha YAS530/YAS532/YAS533 +config INV_YAS53X_IIO + tristate "Invensense IIO driver for Yamaha YAS530/YAS532/YAS533 compass" + depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF + default n + help + This driver supports the Yamaha YAS530/YAS532/YAS533. It is the Invensense + implementation of YAS53x series compass devices. + This driver can be built as a module. The module will be called + inv_yas53x_iio. + +# Aichi AMI306 +config INV_AMI306_IIO + tristate "Invensense IIO driver for Aichi AMI306 compass" + depends on I2C && SYSFS && IIO && IIO_KFIFO_BUF + default n + help + This driver supports the Aichi AMI306 compass. It is the Invensense + IIO implementation for the AMI306 compass device. + This driver can be built as a module. The module will be called + inv-ami306-iio. diff --git a/drivers/staging/iio/magnetometer/inv_compass/Makefile b/drivers/staging/iio/magnetometer/inv_compass/Makefile new file mode 100644 index 00000000000..adc7dd93e1d --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/Makefile @@ -0,0 +1,25 @@ +# +# Makefile for Invensense IIO compass drivers of 3rd party compass devices. +# + +# Yamaha YAS530/YAS532/YAS533 +obj-$(CONFIG_INV_YAS53X_IIO) += inv_yas53x.o + +inv_yas53x-objs := inv_yas53x_core.o +inv_yas53x-objs += inv_yas53x_ring.o +inv_yas53x-objs += inv_yas53x_trigger.o + +CFLAGS_inv_yas53x_core.o += -Idrivers/staging/iio +CFLAGS_inv_yas53x_ring.o += -Idrivers/staging/iio +CFLAGS_inv_yas53x_trigger.o += -Idrivers/staging/iio + +# Aichi AMI306 +obj-$(CONFIG_INV_AMI306_IIO) += inv-ami306-iio.o + +inv-ami306-iio-objs := inv_ami306_core.o +inv-ami306-iio-objs += inv_ami306_ring.o +inv-ami306-iio-objs += inv_ami306_trigger.o + +CFLAGS_inv_ami306_core.o += -Idrivers/staging/iio +CFLAGS_inv_ami306_ring.o += -Idrivers/staging/iio +CFLAGS_inv_ami306_trigger.o += -Idrivers/staging/iio diff --git a/drivers/staging/iio/magnetometer/inv_compass/README b/drivers/staging/iio/magnetometer/inv_compass/README new file mode 100644 index 00000000000..54f2bb8ded2 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/README @@ -0,0 +1,176 @@ +Kernel driver +Author: Invensense + +Table of Contents: +================== +- Description +- Integrating the Driver in the Linux Kernel +- Board and Platform Data + > Platform Data +- Board File Modifications for compass + > AMI306 + > YAS530/532/533 +- IIO Subsystem + > Communicating with the Driver in Userspace +- Streaming Data to an Userspace Application +- Test Applications + > Running Test Applications with AMI306 or YAS53x + +Description +=========== +This document describes how to install the Invensense device driver for AMI306 +and YAS53x series compass chip into a Linux kernel. The Invensense driver +currently supports the following sensors: +- AMI306 +- YAS530 +- YAS532 +- YAS533 + +Please refer to the appropriate product specification +document for further information regarding the slave address. + +The following files are included in this package: +- Kconfig +- Makefile +- inv_ami306_core.c +- inv_ami306_ring.c +- inv_ami306_trigger.c +- inv_ami306_iio.h +- inv_yas53x_core.c +- inv_yas53x_ring.c +- inv_yas53x_trigger.c +- inv_yas53x_iio.h + +Integrating the Driver in the Linux Kernel +========================================== +Please add the files as follows: +- Add all above files to drivers/staging/iio/magnetometer/inv_compass +(another directory is acceptable, but this is the recommended destination) + +In order to see the driver in menuconfig when building the kernel, please +make modifications as shown below: + + modify "drivers/staging/iio/magnetometer/Kconfig" with: + >> source "drivers/staging/iio/magnetometer/inv_compass/Kconfig" + + modify "drivers/staging/iio/magnetometer/Makefile" with: + >> obj-y += inv_compass/ + + +Board and Platform Data +======================= +In order to recognize the Invensense device on the I2C bus, the board file must +be modified. +The i2c_board_info instance must be defined as shown below. + +Platform Data +------------- +The platform data (orientation matrix and secondary bus configurations) must be +modified as show below, according to your particular platform configuration. + +Board File Modifications for Secondary I2C Configuration +======================================================== +For the Panda Board, the board file can be found at +arch/arm/mach-omap2/board-omap4panda.c. +Please modify the pertinent baord file in your system according to the examples +shown below: + +AMI306 +------------------------------------------------- +static struct mpu_platform_data compass_data = { + .orientation = { 0, 0, 1, + 0, 1, 0, + 1, 0, 0 }, +}; + +static struct i2c_board_info __initdata chip_board_info[] = { + { + I2C_BOARD_INFO("ami306", 0x0E), + .platform_data = &compass_data, + }, +}; + +YAS53x(Use YAS532 as an example) +------------------------------------------------- +static struct mpu_platform_data compass_data = { + .orientation = { 0, -1, 0, + 1, 0, 0, + 0, 0, 1 }, +}; + +static struct i2c_board_info __initdata compass_board_info[] = { + { + I2C_BOARD_INFO("yas532", 0x2E), + .platform_data = &compass_data, + }, +}; + +IIO subsystem +============= +A successful installation will create the following two new directories under +/sys/bus/iio/devices: + - iio:device0 + - trigger0 + +Also, a new file, "iio:device0", will be created in the /dev/ diretory. +(if you have more than one IIO device, the file will be named "iio:deviceX", +where X is a number) + + +Communicating with the Driver in Userspace +------------------------------------------ +The driver generates several files in sysfs upon installation. +These files are used to communicate with the driver. The files can be found +at /sys/bus/iio/devices/iio:device0 (or ../iio:deviceX as shown above). + +A brief description of the pertinent files for each Invensense device is shown +below: + +AMI306 +-------- +compass_matrix (read-only) +--show the orientation matrix obtained from the board file. + +sampling_frequency(read and write) +--show and change the sampling rate of the sensor. + +YAS53x +--------------------- +YAS53x has all the attributes AMI306 has. It has one more additional attribute: + +overunderflow(read-only) +--value 1 shows an overflow or underflow happens. Need to write into it to make + it zero. + +Streaming Data to an Userspace Application +========================================== +When streaming data to an userspace application, we recommend that you access +compass data via /dev/iio:device0. + +Please follow the steps below to read data at a constant rate from the driver: + +1. Write the desired output rate to sampling_frequency. +2. Write 1 to enable to turn on the event. +3. Read /dev/iio:device0 to get a string of gyro/accel/compass data. +4. Parse this string to obtain each compass element. + +Test Applications +================= +A test application is located under software/simple_apps/mpu_iio. +This application is stand-alone in that it cannot be run concurrently with other +entities trying to access the device node(s) or sysfs entries; in particular, +the + +Running Test Applications +--------------------------------------------------------- +To run test applications with AMI306 or YAS53x devices, +please use the following commands: + +1. for ami306: + mpu_iio -n ami306 -c 10 -l 3 + +2. for yas532: + mpu_iio -n yas532 -c 10 -l 3 + +Please use mpu_iio.c and iio_utils.h as example code for your development +purposes. diff --git a/drivers/staging/iio/magnetometer/inv_ami306_core.c b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_core.c similarity index 85% rename from drivers/staging/iio/magnetometer/inv_ami306_core.c rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_core.c index c14a9033983..c003a0ea9e5 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_core.c +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_core.c @@ -36,19 +36,36 @@ #include #include #include + #include "inv_ami306_iio.h" -#include "../sysfs.h" +#include "../../sysfs.h" +#include "../../inv_test/inv_counters.h" static unsigned char late_initialize = true; +s32 i2c_write(const struct i2c_client *client, + u8 command, u8 length, const u8 *values) +{ + INV_I2C_INC_COMPASSWRITE(3); + return i2c_smbus_write_i2c_block_data(client, command, length, values); +} + +s32 i2c_read(const struct i2c_client *client, + u8 command, u8 length, u8 *values) +{ + INV_I2C_INC_COMPASSWRITE(3); + INV_I2C_INC_COMPASSREAD(length); + return i2c_smbus_read_i2c_block_data(client, command, length, values); +} + static int ami306_read_param(struct inv_ami306_state_s *st) { int result = 0; unsigned char regs[AMI_PARAM_LEN]; struct ami_sensor_parametor *param = &st->param; - result = i2c_smbus_read_i2c_block_data(st->i2c, REG_AMI_SENX, - AMI_PARAM_LEN, regs); + result = i2c_read(st->i2c, REG_AMI_SENX, + AMI_PARAM_LEN, regs); if (result < 0) return result; @@ -82,13 +99,13 @@ static int ami306_write_offset(const struct i2c_client *client, unsigned char dat[3]; dat[0] = (0x7f & fine[0]); dat[1] = 0; - result = i2c_smbus_write_i2c_block_data(client, REG_AMI_OFFX, 2, dat); + result = i2c_write(client, REG_AMI_OFFX, 2, dat); dat[0] = (0x7f & fine[1]); dat[1] = 0; - result = i2c_smbus_write_i2c_block_data(client, REG_AMI_OFFY, 2, dat); + result = i2c_write(client, REG_AMI_OFFY, 2, dat); dat[0] = (0x7f & fine[2]); dat[1] = 0; - result = i2c_smbus_write_i2c_block_data(client, REG_AMI_OFFZ, 2, dat); + result = i2c_write(client, REG_AMI_OFFZ, 2, dat); return result; } @@ -101,8 +118,7 @@ static int ami306_wait_data_ready(struct inv_ami306_state_s *st, for (; 0 < times; --times) { udelay(usecs); - result = i2c_smbus_read_i2c_block_data(st->i2c, - REG_AMI_STA1, 1, &buf); + result = i2c_read(st->i2c, REG_AMI_STA1, 1, &buf); if (result < 0) return INV_ERROR_COMPASS_DATA_NOT_READY; if (buf & AMI_STA1_DRDY_BIT) @@ -110,6 +126,7 @@ static int ami306_wait_data_ready(struct inv_ami306_state_s *st, else if (buf & AMI_STA1_DOR_BIT) return INV_ERROR_COMPASS_DATA_OVERFLOW; } + return INV_ERROR_COMPASS_DATA_NOT_READY; } int ami306_read_raw_data(struct inv_ami306_state_s *st, @@ -117,13 +134,13 @@ int ami306_read_raw_data(struct inv_ami306_state_s *st, { int result; unsigned char buf[6]; - result = i2c_smbus_read_i2c_block_data(st->i2c, REG_AMI_DATAX, - sizeof(buf), buf); + result = i2c_read(st->i2c, REG_AMI_DATAX, sizeof(buf), buf); if (result < 0) return result; dat[0] = le16_to_cpup((__le16 *)(&buf[0])); dat[1] = le16_to_cpup((__le16 *)(&buf[2])); dat[2] = le16_to_cpup((__le16 *)(&buf[4])); + return 0; } @@ -136,19 +153,19 @@ static int ami306_force_measurement(struct inv_ami306_state_s *st, int status; char buf; buf = AMI_CTRL3_FORCE_BIT; - result = i2c_smbus_write_i2c_block_data(st->i2c, - REG_AMI_CTRL3, 1, &buf); + result = i2c_write(st->i2c, REG_AMI_CTRL3, 1, &buf); if (result < 0) return result; - result = ami306_wait_data_ready(st, - AMI_DRDYWAIT, AMI_WAIT_DATAREADY_RETRY); + result = ami306_wait_data_ready(st, + AMI_DRDYWAIT, AMI_WAIT_DATAREADY_RETRY); if (result && result != INV_ERROR_COMPASS_DATA_OVERFLOW) return result; /* READ DATA X,Y,Z */ status = ami306_read_raw_data(st, ver); if (status) return status; + return result; } @@ -163,15 +180,13 @@ static int ami306_initial_b0_adjust(struct inv_ami306_state_s *st) unsigned char buf[3]; buf[0] = AMI_CTRL2_DREN; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL2, - 1, buf); + result = i2c_write(st->i2c, REG_AMI_CTRL2, 1, buf); if (result) return result; buf[0] = AMI_CTRL4_HS & 0xFF; buf[1] = (AMI_CTRL4_HS >> 8) & 0xFF; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL4, - 2, buf); + result = i2c_write(st->i2c, REG_AMI_CTRL4, 2, buf); if (result < 0) return result; @@ -199,8 +214,7 @@ static int ami306_initial_b0_adjust(struct inv_ami306_state_s *st) /* Software Reset */ buf[0] = AMI_CTRL3_SRST_BIT; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL3, 1, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL3, 1, buf); if (result < 0) return result; else @@ -214,27 +228,25 @@ static int ami306_start_sensor(struct inv_ami306_state_s *st) /* Step 1 */ buf[0] = (AMI_CTRL1_PC1 | AMI_CTRL1_FS1_FORCE); - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL1, 1, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL1, 1, buf); if (result < 0) return result; /* Step 2 */ buf[0] = AMI_CTRL2_DREN; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL2, 1, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL2, 1, buf); if (result < 0) return result; /* Step 3 */ buf[0] = (AMI_CTRL4_HS & 0xFF); buf[1] = (AMI_CTRL4_HS >> 8) & 0xFF; - result = i2c_smbus_write_i2c_block_data(st->i2c, REG_AMI_CTRL4, 2, - buf); + result = i2c_write(st->i2c, REG_AMI_CTRL4, 2, buf); if (result < 0) return result; /* Step 4 */ result = ami306_write_offset(st->i2c, st->fine); + return result; } @@ -243,32 +255,30 @@ int set_ami306_enable(struct iio_dev *indio_dev, int state) struct inv_ami306_state_s *st = iio_priv(indio_dev); int result; char buf; - if (state) { - buf = (AMI_CTRL1_PC1 | AMI_CTRL1_FS1_FORCE); - result = i2c_smbus_write_i2c_block_data(st->i2c, - REG_AMI_CTRL1, 1, &buf); - if (result < 0) - return result; - result = ami306_read_param(st); - if (result) - return result; - if (late_initialize) { - result = ami306_initial_b0_adjust(st); - if (result) - return result; - late_initialize = false; - } - result = ami306_start_sensor(st); - if (result) - return result; - buf = AMI_CTRL3_FORCE_BIT; - st->timestamp = iio_get_time_ns(); - result = i2c_smbus_write_i2c_block_data(st->i2c, - REG_AMI_CTRL3, 1, &buf); + buf = (AMI_CTRL1_PC1 | AMI_CTRL1_FS1_FORCE); + result = i2c_write(st->i2c, REG_AMI_CTRL1, 1, &buf); + if (result < 0) + return result; + + result = ami306_read_param(st); + if (result) + return result; + if (late_initialize) { + result = ami306_initial_b0_adjust(st); if (result) return result; + late_initialize = false; } + result = ami306_start_sensor(st); + if (result) + return result; + buf = AMI_CTRL3_FORCE_BIT; + st->timestamp = iio_get_time_ns(); + result = i2c_write(st->i2c, REG_AMI_CTRL3, 1, &buf); + if (result) + return result; + return 0; } @@ -281,8 +291,11 @@ static int ami306_read_raw(struct iio_dev *indio_dev, int *val2, long mask) { struct inv_ami306_state_s *st = iio_priv(indio_dev); + switch (mask) { case 0: + if (!(iio_buffer_enabled(indio_dev))) + return -EINVAL; if (chan->type == IIO_MAGN) { *val = st->compass_data[chan->channel2 - IIO_MOD_X]; return IIO_VAL_INT; @@ -300,25 +313,6 @@ static int ami306_read_raw(struct iio_dev *indio_dev, } } -/** - * ami306_write_raw() - write raw method. - */ -static int ami306_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int val, - int val2, - long mask) { - int result; - switch (mask) { - case IIO_CHAN_INFO_SCALE: - result = -EINVAL; - return result; - default: - return -EINVAL; - } - return 0; -} - /** * inv_compass_matrix_show() - show orientation matrix */ @@ -389,11 +383,8 @@ static ssize_t compass_cali_test(struct device *dev, /* Check if raw data match the gain from calibration file */ for (ii = 0; ii < 3; ii++) { - val = (short)(st->data_chk.ori[ii]); - - if (st->data_chk.gain[ii] > 0) - val = (short)(st->data_chk.ori[ii] * - 100 / st->data_chk.gain[ii]); + val = (short)(st->data_chk.ori[ii] * + st->data_chk.gain[ii] / 100); if (val == st->data_chk.post[ii]) bufcnt += sprintf(tmpbuf, @@ -401,7 +392,7 @@ static ssize_t compass_cali_test(struct device *dev, ii); else bufcnt += sprintf(tmpbuf, - "[axis-%d] Compensation FAIL. %d != %d\n", + "[axis-%d] Compensation FAIL. %d != %d", ii, val, st->data_chk.post[ii]); strncat(buf, tmpbuf, strlen(tmpbuf)); @@ -428,9 +419,17 @@ static void ami306_work_func(struct work_struct *work) struct iio_dev *indio_dev = iio_priv_to_dev(st); unsigned long delay = msecs_to_jiffies(st->delay); - inv_read_ami306_fifo(indio_dev); + mutex_lock(&indio_dev->mlock); + if (!(iio_buffer_enabled(indio_dev))) + goto error_ret; + st->timestamp = iio_get_time_ns(); schedule_delayed_work(&st->work, delay); + inv_read_ami306_fifo(indio_dev); + INV_I2C_INC_COMPASSIRQ(); + +error_ret: + mutex_unlock(&indio_dev->mlock); } static const struct iio_chan_spec compass_channels[] = { @@ -478,7 +477,6 @@ static const struct attribute_group inv_attribute_group = { static const struct iio_info ami306_info = { .driver_module = THIS_MODULE, .read_raw = &ami306_read_raw, - .write_raw = &ami306_write_raw, .attrs = &inv_attribute_group, }; @@ -506,7 +504,6 @@ static int inv_ami306_probe(struct i2c_client *client, } st = iio_priv(indio_dev); st->i2c = client; - st->sl_handle = client->adapter; st->plat_data = *(struct mpu_platform_data *)dev_get_platdata(&client->dev); st->delay = 10; @@ -519,7 +516,7 @@ static int inv_ami306_probe(struct i2c_client *client, /* Make state variables available to all _show and _store functions. */ i2c_set_clientdata(client, indio_dev); - result = i2c_smbus_read_i2c_block_data(st->i2c, REG_AMI_WIA, 1, &data); + result = i2c_read(st->i2c, REG_AMI_WIA, 1, &data); if (result < 0) goto out_free; if (data != DATA_WIA) diff --git a/drivers/staging/iio/magnetometer/inv_ami306_iio.h b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_iio.h similarity index 88% rename from drivers/staging/iio/magnetometer/inv_ami306_iio.h rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_iio.h index 41c355eed97..b4e03b43d2a 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_iio.h +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_iio.h @@ -17,8 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro.h - * @brief Struct definitions for the Invensense gyro driver. + * @file inv_ami306_iio.h + * @brief Struct definitions for the Invensense implementation + * of ami306 driver. */ #ifndef _INV_GYRO_H_ @@ -30,9 +31,11 @@ #include #include #include -#include "../iio.h" -#include "../buffer.h" -#include "../trigger.h" + +#include "../../iio.h" +#include "../../buffer.h" +#include "../../trigger.h" + /** axis sensitivity(gain) calibration parameter information */ struct ami_vector3d { signed short x; /**< X-axis */ @@ -79,21 +82,24 @@ struct cali_data_check { /** * struct inv_ami306_state_s - Driver state variables. + * @plat_data: board file platform data. * @i2c: i2c client handle. - * @sl_handle: Handle to I2C port. + * @trig: not used. for compatibility. + * @param: ami specific sensor data. + * @work: work data structure. + * @delay: delay between each scheduled work. + * @fine: fine tunign parameters. + * @compass_data: compass data store. + * @timestamp: time stamp. */ struct inv_ami306_state_s { struct mpu_platform_data plat_data; struct i2c_client *i2c; - struct inv_chip_chan_info *chan_info; struct iio_trigger *trig; struct ami_sensor_parametor param; - unsigned char i2c_addr; - void *sl_handle; struct delayed_work work; int delay; - char enable; - char fine[3]; + s8 fine[3]; short compass_data[3]; s64 timestamp; struct cali_data_check data_chk; diff --git a/drivers/staging/iio/magnetometer/inv_ami306_ring.c b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_ring.c similarity index 82% rename from drivers/staging/iio/magnetometer/inv_ami306_ring.c rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_ring.c index 9cff293f86a..d304c49c7ab 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_ring.c +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_ring.c @@ -17,9 +17,9 @@ * @brief Hardware drivers. * * @{ - * @file inv_gyro_misc.c - * @brief A sysfs device driver for Invensense gyroscopes. - * @details This file is part of inv_gyro driver code + * @file inv_ami306_ring.c + * @brief Invensense implementation for AMI306 + * @details This driver currently works for the AMI306 */ #include @@ -35,11 +35,13 @@ #include #include #include + +#include "../../iio.h" +#include "../../kfifo_buf.h" +#include "../../trigger_consumer.h" +#include "../../sysfs.h" + #include "inv_ami306_iio.h" -#include "../iio.h" -#include "../kfifo_buf.h" -#include "../trigger_consumer.h" -#include "../sysfs.h" #define AMI30X_CALIBRATION_PATH "/data/sensors/AMI304_Config.ini" #define AMI306_CALIBRATION_PATH "/data/sensors/AMI306_Config.ini" @@ -59,7 +61,7 @@ static int access_cali_file(int *gain, int target) int data[23]; int ii; - oldfs=get_fs(); + oldfs = get_fs(); set_fs(get_ds()); memset(buf, 0, sizeof(u8)*256); @@ -92,20 +94,19 @@ static int access_cali_file(int *gain, int target) if ((data[19] > 150) || (data[19] < 50) || (data[20] > 150) || (data[20] < 50) || (data[21] > 150) || (data[21] < 50)) { - for(ii = 0; ii < 3; ii++) + for (ii = 0; ii < 3; ii++) gain[ii] = 100; - }else{ - for(ii = 0; ii < 3; ii++) + } else { + for (ii = 0; ii < 3; ii++) gain[ii] = data[ii + 19]; } pr_info("gain: %d %d %d\n", gain[0], gain[1], gain[2]); return 0; - } - else - { - pr_info("Compass compensation: No target File. (%d)\n", target); + } else { + pr_info("Compass compensation: No target File. (%d)\n", + target); set_fs(oldfs); return -1; } @@ -114,18 +115,6 @@ static int access_cali_file(int *gain, int target) return -1; } -/** - * inv_irq_handler() - Cache a timestamp at each data ready interrupt. - */ -static irqreturn_t inv_ami_irq_handler(int irq, void *p) -{ - struct iio_poll_func *pf = p; - struct iio_dev *indio_dev = pf->indio_dev; - struct inv_ami306_state_s *st = iio_priv(indio_dev); - st->timestamp = iio_get_time_ns(); - - return IRQ_WAKE_THREAD; -} static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, short *s, int scan_index) { struct iio_buffer *ring = indio_dev->buffer; @@ -167,7 +156,8 @@ int inv_read_ami306_fifo(struct iio_dev *indio_dev) if (!st->data_chk.load_cali) { for (ii = 0; ii < AMICaliMax; ii++) { - result = access_cali_file(st->data_chk.gain, ii); + result = + access_cali_file(st->data_chk.gain, ii); if (!result) { st->data_chk.fexist = 0; break; @@ -209,18 +199,34 @@ void inv_ami306_unconfigure_ring(struct iio_dev *indio_dev) static int inv_ami306_postenable(struct iio_dev *indio_dev) { struct inv_ami306_state_s *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; int result; + /* when all the outputs are disabled, even though buffer/enable is on, + do nothing */ + if (!(iio_scan_mask_query(indio_dev, ring, INV_AMI306_SCAN_MAGN_X) || + iio_scan_mask_query(indio_dev, ring, INV_AMI306_SCAN_MAGN_Y) || + iio_scan_mask_query(indio_dev, ring, INV_AMI306_SCAN_MAGN_Z))) + return 0; + result = set_ami306_enable(indio_dev, true); - schedule_delayed_work(&st->work, - msecs_to_jiffies(st->delay)); + if (result) + return result; + schedule_delayed_work(&st->work, msecs_to_jiffies(st->delay)); + return 0; } static int inv_ami306_predisable(struct iio_dev *indio_dev) { + struct iio_buffer *ring = indio_dev->buffer; struct inv_ami306_state_s *st = iio_priv(indio_dev); + cancel_delayed_work_sync(&st->work); + clear_bit(INV_AMI306_SCAN_MAGN_X, ring->scan_mask); + clear_bit(INV_AMI306_SCAN_MAGN_Y, ring->scan_mask); + clear_bit(INV_AMI306_SCAN_MAGN_Z, ring->scan_mask); + return 0; } diff --git a/drivers/staging/iio/magnetometer/inv_ami306_trigger.c b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_trigger.c similarity index 91% rename from drivers/staging/iio/magnetometer/inv_ami306_trigger.c rename to drivers/staging/iio/magnetometer/inv_compass/inv_ami306_trigger.c index 95f82ede2f8..2159edeaafc 100644 --- a/drivers/staging/iio/magnetometer/inv_ami306_trigger.c +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_ami306_trigger.c @@ -18,8 +18,8 @@ * * @{ * @file inv_ami306_trigger.c - * @brief A sysfs device driver for Invensense devices - * @details This file is part of inv_gyro driver code + * @brief Invensense implementation for AMI306 + * @details This driver currently works for the AMI306 */ #include @@ -37,9 +37,9 @@ #include #include -#include "../iio.h" -#include "../sysfs.h" -#include "../trigger.h" +#include "../../iio.h" +#include "../../sysfs.h" +#include "../../trigger.h" #include "inv_ami306_iio.h" static const struct iio_trigger_ops inv_ami306_trigger_ops = { diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_core.c b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_core.c new file mode 100644 index 00000000000..6af420bb5cf --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_core.c @@ -0,0 +1,969 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_core.c + * @brief Invensense implementation for yas530/yas532/yas533. + * @details This driver currently works for yas530/yas532/yas533. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "inv_yas53x_iio.h" +#include "sysfs.h" +#include "inv_test/inv_counters.h" + +/* -------------------------------------------------------------------------- */ +static int Cx, Cy1, Cy2; +static int /*a1, */ a2, a3, a4, a5, a6, a7, a8, a9; +static int k; + +static u8 dx, dy1, dy2; +static u8 d2, d3, d4, d5, d6, d7, d8, d9, d0; +static u8 dck, ver; + +/** + * inv_serial_read() - Read one or more bytes from the device registers. + * @st: Device driver instance. + * @reg: First device register to be read from. + * @length: Number of bytes to read. + * @data: Data read from device. + * NOTE: The slave register will not increment when reading from the FIFO. + */ +int inv_serial_read(struct inv_compass_state *st, u8 reg, u16 length, u8 *data) +{ + int result; + INV_I2C_INC_COMPASSWRITE(3); + INV_I2C_INC_COMPASSREAD(length); + result = i2c_smbus_read_i2c_block_data(st->client, reg, length, data); + if (result != length) { + if (result < 0) + return result; + else + return -EINVAL; + } else { + return 0; + } +} + +/** + * inv_serial_single_write() - Write a byte to a device register. + * @st: Device driver instance. + * @reg: Device register to be written to. + * @data: Byte to write to device. + */ +int inv_serial_single_write(struct inv_compass_state *st, u8 reg, u8 data) +{ + u8 d[1]; + d[0] = data; + INV_I2C_INC_COMPASSWRITE(3); + + return i2c_smbus_write_i2c_block_data(st->client, reg, 1, d); +} + +static int set_hardware_offset(struct inv_compass_state *st, + char offset_x, char offset_y1, char offset_y2) +{ + char data; + int result = 0; + + data = offset_x & 0x3f; + result = inv_serial_single_write(st, YAS530_REGADDR_OFFSET_X, data); + if (result) + return result; + + data = offset_y1 & 0x3f; + result = inv_serial_single_write(st, YAS530_REGADDR_OFFSET_Y1, data); + if (result) + return result; + + data = offset_y2 & 0x3f; + result = inv_serial_single_write(st, YAS530_REGADDR_OFFSET_Y2, data); + return result; +} + +static int set_measure_command(struct inv_compass_state *st) +{ + int result = 0; + result = inv_serial_single_write(st, + YAS530_REGADDR_MEASURE_COMMAND, 0x01); + return result; +} + +static int measure_normal(struct inv_compass_state *st, + int *busy, unsigned short *t, + unsigned short *x, unsigned short *y1, + unsigned short *y2) +{ + int result; + ktime_t sleeptime; + result = set_measure_command(st); + sleeptime = ktime_set(0, 2 * NSEC_PER_MSEC); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout(&sleeptime, HRTIMER_MODE_REL); + + result = st->read_data(st, busy, t, x, y1, y2); + + return result; +} + +static int measure_int(struct inv_compass_state *st, + int *busy, unsigned short *t, + unsigned short *x, unsigned short *y1, + unsigned short *y2) +{ + int result; + if (st->first_read_after_reset) { + st->first_read_after_reset = 0; + result = 1; + } else { + result = st->read_data(st, busy, t, x, y1, y2); + } + result |= set_measure_command(st); + + return result; +} + +static int yas530_read_data(struct inv_compass_state *st, + int *busy, u16 *t, u16 *x, u16 *y1, u16 *y2) +{ + u8 data[8]; + u16 b, to, xo, y1o, y2o; + int result; + + result = inv_serial_read(st, + YAS530_REGADDR_MEASURE_DATA, 8, data); + if (result) + return result; + + b = (data[0] >> 7) & 0x01; + to = (s16)(((data[0] << 2) & 0x1fc) | ((data[1] >> 6) & 0x03)); + xo = (s16)(((data[2] << 5) & 0xfe0) | ((data[3] >> 3) & 0x1f)); + y1o = (s16)(((data[4] << 5) & 0xfe0) | ((data[5] >> 3) & 0x1f)); + y2o = (s16)(((data[6] << 5) & 0xfe0) | ((data[7] >> 3) & 0x1f)); + + *busy = b; + *t = to; + *x = xo; + *y1 = y1o; + *y2 = y2o; + + return 0; +} + +static int yas532_533_read_data(struct inv_compass_state *st, + int *busy, u16 *t, u16 *x, u16 *y1, u16 *y2) +{ + u8 data[8]; + u16 b, to, xo, y1o, y2o; + int result; + + result = inv_serial_read(st, + YAS530_REGADDR_MEASURE_DATA, 8, data); + if (result) + return result; + + b = (data[0] >> 7) & 0x01; + to = (s16)((((s32)data[0] << 3) & 0x3f8) | ((data[1] >> 5) & 0x07)); + xo = (s16)((((s32)data[2] << 6) & 0x1fc0) | ((data[3] >> 2) & 0x3f)); + y1o = (s16)((((s32)data[4] << 6) & 0x1fc0) | ((data[5] >> 2) & 0x3f)); + y2o = (s16)((((s32)data[6] << 6) & 0x1fc0) | ((data[7] >> 2) & 0x3f)); + + *busy = b; + *t = to; + *x = xo; + *y1 = y1o; + *y2 = y2o; + + return 0; +} + +static int check_offset(struct inv_compass_state *st, + char offset_x, char offset_y1, char offset_y2, + int *flag_x, int *flag_y1, int *flag_y2) +{ + int result; + int busy; + short t, x, y1, y2; + + result = set_hardware_offset(st, offset_x, offset_y1, offset_y2); + if (result) + return result; + result = measure_normal(st, &busy, &t, &x, &y1, &y2); + if (result) + return result; + *flag_x = 0; + *flag_y1 = 0; + *flag_y2 = 0; + + if (x > st->center) + *flag_x = 1; + if (y1 > st->center) + *flag_y1 = 1; + if (y2 > st->center) + *flag_y2 = 1; + if (x < st->center) + *flag_x = -1; + if (y1 < st->center) + *flag_y1 = -1; + if (y2 < st->center) + *flag_y2 = -1; + + return result; +} + +static int measure_and_set_offset(struct inv_compass_state *st, + char *offset) +{ + int i; + int result = 0; + char offset_x = 0, offset_y1 = 0, offset_y2 = 0; + int flag_x = 0, flag_y1 = 0, flag_y2 = 0; + static const int correct[5] = {16, 8, 4, 2, 1}; + + for (i = 0; i < 5; i++) { + result = check_offset(st, + offset_x, offset_y1, offset_y2, + &flag_x, &flag_y1, &flag_y2); + if (result) + return result; + if (flag_x) + offset_x += flag_x * correct[i]; + if (flag_y1) + offset_y1 += flag_y1 * correct[i]; + if (flag_y2) + offset_y2 += flag_y2 * correct[i]; + } + + result = set_hardware_offset(st, offset_x, offset_y1, offset_y2); + if (result) + return result; + offset[0] = offset_x; + offset[1] = offset_y1; + offset[2] = offset_y2; + + return result; +} + +static void coordinate_conversion(short x, short y1, short y2, short t, + int *xo, int *yo, int *zo) +{ + int sx, sy1, sy2, sy, sz; + int hx, hy, hz; + + sx = x - (Cx * t) / 100; + sy1 = y1 - (Cy1 * t) / 100; + sy2 = y2 - (Cy2 * t) / 100; + + sy = sy1 - sy2; + sz = -sy1 - sy2; + + hx = k * ((100 * sx + a2 * sy + a3 * sz) / 10); + hy = k * ((a4 * sx + a5 * sy + a6 * sz) / 10); + hz = k * ((a7 * sx + a8 * sy + a9 * sz) / 10); + + *xo = hx; + *yo = hy; + *zo = hz; +} + +static int get_cal_data_yas532_533(struct inv_compass_state *st) +{ + u8 data[YAS_YAS532_533_CAL_DATA_SIZE]; + int result; + + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS532_533_CAL_DATA_SIZE, data); + if (result) + return result; + /* CAL data Second Read */ + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS532_533_CAL_DATA_SIZE, data); + if (result) + return result; + + dx = data[0]; + dy1 = data[1]; + dy2 = data[2]; + d2 = (data[3] >> 2) & 0x03f; + d3 = (u8)(((data[3] << 2) & 0x0c) | ((data[4] >> 6) & 0x03)); + d4 = (u8)(data[4] & 0x3f); + d5 = (data[5] >> 2) & 0x3f; + d6 = (u8)(((data[5] << 4) & 0x30) | ((data[6] >> 4) & 0x0f)); + d7 = (u8)(((data[6] << 3) & 0x78) | ((data[7] >> 5) & 0x07)); + d8 = (u8)(((data[7] << 1) & 0x3e) | ((data[8] >> 7) & 0x01)); + d9 = (u8)(((data[8] << 1) & 0xfe) | ((data[9] >> 7) & 0x01)); + d0 = (u8)((data[9] >> 2) & 0x1f); + dck = (u8)(((data[9] << 1) & 0x06) | ((data[10] >> 7) & 0x01)); + ver = (u8)((data[13]) & 0x01); + + Cx = dx * 10 - 1280; + Cy1 = dy1 * 10 - 1280; + Cy2 = dy2 * 10 - 1280; + a2 = d2 - 32; + a3 = d3 - 8; + a4 = d4 - 32; + a5 = d5 + 38; + a6 = d6 - 32; + a7 = d7 - 64; + a8 = d8 - 32; + a9 = d9; + k = d0; + + return 0; +} + +static int get_cal_data_yas530(struct inv_compass_state *st) +{ + u8 data[YAS_YAS530_CAL_DATA_SIZE]; + int result; + /* CAL data read */ + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS530_CAL_DATA_SIZE, data); + if (result) + return result; + /* CAL data Second Read */ + result = inv_serial_read(st, YAS530_REGADDR_CAL, + YAS_YAS530_CAL_DATA_SIZE, data); + if (result) + return result; + /*Cal data */ + dx = data[0]; + dy1 = data[1]; + dy2 = data[2]; + d2 = (data[3] >> 2) & 0x03f; + d3 = ((data[3] << 2) & 0x0c) | ((data[4] >> 6) & 0x03); + d4 = data[4] & 0x3f; + d5 = (data[5] >> 2) & 0x3f; + d6 = ((data[5] << 4) & 0x30) | ((data[6] >> 4) & 0x0f); + d7 = ((data[6] << 3) & 0x78) | ((data[7] >> 5) & 0x07); + d8 = ((data[7] << 1) & 0x3e) | ((data[8] >> 7) & 0x01); + d9 = ((data[8] << 1) & 0xfe) | ((data[9] >> 7) & 0x01); + d0 = (data[9] >> 2) & 0x1f; + dck = ((data[9] << 1) & 0x06) | ((data[10] >> 7) & 0x01); + ver = (u8)((data[15]) & 0x03); + + /*Correction Data */ + Cx = (int)dx * 6 - 768; + Cy1 = (int)dy1 * 6 - 768; + Cy2 = (int)dy2 * 6 - 768; + a2 = (int)d2 - 32; + a3 = (int)d3 - 8; + a4 = (int)d4 - 32; + a5 = (int)d5 + 38; + a6 = (int)d6 - 32; + a7 = (int)d7 - 64; + a8 = (int)d8 - 32; + a9 = (int)d9; + k = (int)d0 + 10; + + return 0; +} + + +static void thresh_filter_init(struct yas_thresh_filter *thresh_filter, + int threshold) +{ + thresh_filter->threshold = threshold; + thresh_filter->last = 0; +} + +static void +adaptive_filter_init(struct yas_adaptive_filter *adap_filter, int len, + int noise) +{ + int i; + + adap_filter->num = 0; + adap_filter->index = 0; + adap_filter->filter_noise = noise; + adap_filter->filter_len = len; + + for (i = 0; i < adap_filter->filter_len; ++i) + adap_filter->sequence[i] = 0; +} + +static void yas_init_adap_filter(struct inv_compass_state *st) +{ + struct yas_filter *f; + int i; + int noise[] = {YAS_MAG_DEFAULT_FILTER_NOISE_X, + YAS_MAG_DEFAULT_FILTER_NOISE_Y, + YAS_MAG_DEFAULT_FILTER_NOISE_Z}; + + f = &st->filter; + f->filter_len = YAS_MAG_DEFAULT_FILTER_LEN; + for (i = 0; i < 3; i++) + f->filter_noise[i] = noise[i]; + + for (i = 0; i < 3; i++) { + adaptive_filter_init(&f->adap_filter[i], f->filter_len, + f->filter_noise[i]); + thresh_filter_init(&f->thresh_filter[i], f->filter_thresh); + } +} + +int yas53x_resume(struct inv_compass_state *st) +{ + int result = 0; + + unsigned char dummyData = 0x00; + unsigned char read_reg[1]; + + /* =============================================== */ + + /* Step 1 - Test register initialization */ + dummyData = 0x00; + result = inv_serial_single_write(st, + YAS530_REGADDR_TEST1, dummyData); + if (result) + return result; + result = + inv_serial_single_write(st, + YAS530_REGADDR_TEST2, dummyData); + if (result) + return result; + /* Device ID read */ + result = inv_serial_read(st, + YAS530_REGADDR_DEVICE_ID, 1, read_reg); + + /*Step 2 Read the CAL register */ + st->get_cal_data(st); + + /*Obtain the [49:47] bits */ + dck &= 0x07; + + /*Step 3 : Storing the CONFIG with the CLK value */ + dummyData = 0x00 | (dck << 2); + result = inv_serial_single_write(st, + YAS530_REGADDR_CONFIG, dummyData); + if (result) + return result; + /*Step 4 : Set Acquisition Interval Register */ + dummyData = 0x00; + result = inv_serial_single_write(st, + YAS530_REGADDR_MEASURE_INTERVAL, + dummyData); + if (result) + return result; + + /*Step 5 : Reset Coil */ + dummyData = 0x00; + result = inv_serial_single_write(st, + YAS530_REGADDR_ACTUATE_INIT_COIL, + dummyData); + if (result) + return result; + /* Offset Measurement and Set */ + result = measure_and_set_offset(st, st->offset); + if (result) + return result; + st->first_measure_after_reset = 1; + st->first_read_after_reset = 1; + st->reset_timer = 0; + + yas_init_adap_filter(st); + + return result; +} + +static int inv_check_range(struct inv_compass_state *st, s16 x, s16 y1, s16 y2) +{ + int result = 0; + + if (x == 0) + result |= 0x01; + if (x == st->overflow_bound) + result |= 0x02; + if (y1 == 0) + result |= 0x04; + if (y1 == st->overflow_bound) + result |= 0x08; + if (y2 == 0) + result |= 0x10; + if (y2 == st->overflow_bound) + result |= 0x20; + + return result; +} +static int square(int data) +{ + return data * data; +} + +static int +adaptive_filter_filter(struct yas_adaptive_filter *adap_filter, int in) +{ + int avg, sum; + int i; + + if (adap_filter->filter_len == 0) + return in; + if (adap_filter->num < adap_filter->filter_len) { + adap_filter->sequence[adap_filter->index++] = in / 100; + adap_filter->num++; + return in; + } + if (adap_filter->filter_len <= adap_filter->index) + adap_filter->index = 0; + adap_filter->sequence[adap_filter->index++] = in / 100; + + avg = 0; + for (i = 0; i < adap_filter->filter_len; i++) + avg += adap_filter->sequence[i]; + avg /= adap_filter->filter_len; + + sum = 0; + for (i = 0; i < adap_filter->filter_len; i++) + sum += square(avg - adap_filter->sequence[i]); + sum /= adap_filter->filter_len; + + if (sum <= adap_filter->filter_noise) + return avg * 100; + + return ((in/100 - avg) * (sum - adap_filter->filter_noise) / sum + avg) + * 100; +} + +static int +thresh_filter_filter(struct yas_thresh_filter *thresh_filter, int in) +{ + if (in < thresh_filter->last - thresh_filter->threshold + || thresh_filter->last + + thresh_filter->threshold < in) { + thresh_filter->last = in; + return in; + } else { + return thresh_filter->last; + } +} + +static void +filter_filter(struct yas_filter *d, int *orig, int *filtered) +{ + int i; + + for (i = 0; i < 3; i++) { + filtered[i] = adaptive_filter_filter(&d->adap_filter[i], + orig[i]); + filtered[i] = thresh_filter_filter(&d->thresh_filter[i], + filtered[i]); + } +} + +int yas53x_read(struct inv_compass_state *st, short rawfixed[3], + int *overunderflow) +{ + int result = 0; + + int busy, i, ov; + short t, x, y1, y2; + s32 xyz[3], disturb[3]; + + result = measure_int(st, &busy, &t, &x, &y1, &y2); + if (result) + return result; + if (busy) + return -1; + coordinate_conversion(x, y1, y2, t, &xyz[0], &xyz[1], &xyz[2]); + filter_filter(&st->filter, xyz, xyz); + for (i = 0; i < 3; i++) + rawfixed[i] = (short)(xyz[i] / 100); + + if (st->first_measure_after_reset) { + for (i = 0; i < 3; i++) + st->base_compass_data[i] = rawfixed[i]; + st->first_measure_after_reset = 0; + } + ov = 0; + for (i = 0; i < 3; i++) { + disturb[i] = abs(st->base_compass_data[i] - rawfixed[i]); + if (disturb[i] > YAS_MAG_DISTURBURNCE_THRESHOLD) + ov = 1; + } + if (ov) + st->reset_timer += st->delay; + else + st->reset_timer = 0; + + if (st->reset_timer > YAS_RESET_COIL_TIME_THRESHOLD) + *overunderflow = (1<<8); + else + *overunderflow = 0; + *overunderflow |= inv_check_range(st, x, y1, y2); + + return 0; +} + +/** + * yas53x_read_raw() - read raw method. + */ +static int yas53x_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, + int *val2, + long mask) { + struct inv_compass_state *st = iio_priv(indio_dev); + + switch (mask) { + case 0: + if (!(iio_buffer_enabled(indio_dev))) + return -EINVAL; + if (chan->type == IIO_MAGN) { + *val = st->compass_data[chan->channel2 - IIO_MOD_X]; + return IIO_VAL_INT; + } + + return -EINVAL; + case IIO_CHAN_INFO_SCALE: + if (chan->type == IIO_MAGN) { + *val = YAS530_SCALE; + return IIO_VAL_INT; + } + return -EINVAL; + default: + return -EINVAL; + } +} + +/** + * inv_compass_matrix_show() - show orientation matrix + */ +static ssize_t inv_compass_matrix_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + signed char *m; + struct inv_compass_state *st = iio_priv(indio_dev); + m = st->plat_data.orientation; + return sprintf(buf, + "%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]); +} + +static ssize_t yas53x_rate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 data; + int error; + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + + error = kstrtoint(buf, 10, &data); + if (error) + return error; + if (0 == data) + return -EINVAL; + /* transform rate to delay in ms */ + data = MSEC_PER_SEC / data; + + if (data > YAS530_MAX_DELAY) + data = YAS530_MAX_DELAY; + if (data < YAS530_MIN_DELAY) + data = YAS530_MIN_DELAY; + st->delay = data; + + return count; +} + +static ssize_t yas53x_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + /* transform delay in ms to rate */ + return sprintf(buf, "%d\n", (int)MSEC_PER_SEC / st->delay); +} + +static ssize_t yas53x_overunderflow_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 data; + int error; + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + + error = kstrtoint(buf, 10, &data); + if (error) + return error; + if (data) + return -EINVAL; + st->overunderflow = data; + + return count; +} + +static ssize_t yas53x_overunderflow_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct inv_compass_state *st = iio_priv(indio_dev); + + return sprintf(buf, "%d\n", st->overunderflow); +} + +void set_yas53x_enable(struct iio_dev *indio_dev, bool enable) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + + yas_init_adap_filter(st); + st->first_measure_after_reset = 1; + st->first_read_after_reset = 1; + schedule_delayed_work(&st->work, msecs_to_jiffies(st->delay)); +} + +static void yas53x_work_func(struct work_struct *work) +{ + struct inv_compass_state *st = + container_of((struct delayed_work *)work, + struct inv_compass_state, work); + struct iio_dev *indio_dev = iio_priv_to_dev(st); + u32 delay = msecs_to_jiffies(st->delay); + + mutex_lock(&indio_dev->mlock); + if (!(iio_buffer_enabled(indio_dev))) + goto error_ret; + + schedule_delayed_work(&st->work, delay); + inv_read_yas53x_fifo(indio_dev); + INV_I2C_INC_COMPASSIRQ(); + +error_ret: + mutex_unlock(&indio_dev->mlock); +} + +static const struct iio_chan_spec compass_channels[] = { + { + .type = IIO_MAGN, + .modified = 1, + .channel2 = IIO_MOD_X, + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, + .scan_index = INV_YAS53X_SCAN_MAGN_X, + .scan_type = IIO_ST('s', 16, 16, 0) + }, { + .type = IIO_MAGN, + .modified = 1, + .channel2 = IIO_MOD_Y, + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, + .scan_index = INV_YAS53X_SCAN_MAGN_Y, + .scan_type = IIO_ST('s', 16, 16, 0) + }, { + .type = IIO_MAGN, + .modified = 1, + .channel2 = IIO_MOD_Z, + .info_mask = IIO_CHAN_INFO_SCALE_SHARED_BIT, + .scan_index = INV_YAS53X_SCAN_MAGN_Z, + .scan_type = IIO_ST('s', 16, 16, 0) + }, + IIO_CHAN_SOFT_TIMESTAMP(INV_YAS53X_SCAN_TIMESTAMP) +}; + +static DEVICE_ATTR(compass_matrix, S_IRUGO, inv_compass_matrix_show, NULL); +static DEVICE_ATTR(sampling_frequency, S_IRUGO | S_IWUSR, yas53x_rate_show, + yas53x_rate_store); +static DEVICE_ATTR(overunderflow, S_IRUGO | S_IWUSR, + yas53x_overunderflow_show, yas53x_overunderflow_store); + +static struct attribute *inv_yas53x_attributes[] = { + &dev_attr_compass_matrix.attr, + &dev_attr_sampling_frequency.attr, + &dev_attr_overunderflow.attr, + NULL, +}; +static const struct attribute_group inv_attribute_group = { + .name = "yas53x", + .attrs = inv_yas53x_attributes +}; + +static const struct iio_info yas53x_info = { + .driver_module = THIS_MODULE, + .read_raw = &yas53x_read_raw, + .attrs = &inv_attribute_group, +}; + +/*constant IIO attribute */ +/** + * inv_yas53x_probe() - probe function. + */ +static int inv_yas53x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct inv_compass_state *st; + struct iio_dev *indio_dev; + int result; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + result = -ENODEV; + goto out_no_free; + } + indio_dev = iio_allocate_device(sizeof(*st)); + if (indio_dev == NULL) { + result = -ENOMEM; + goto out_no_free; + } + st = iio_priv(indio_dev); + st->client = client; + st->plat_data = + *(struct mpu_platform_data *)dev_get_platdata(&client->dev); + st->delay = 10; + + i2c_set_clientdata(client, indio_dev); + + if (!strcmp(id->name, "yas530")) { + st->read_data = yas530_read_data; + st->get_cal_data = get_cal_data_yas530; + st->overflow_bound = YAS_YAS530_DATA_OVERFLOW; + st->center = YAS_YAS530_DATA_CENTER; + st->filter.filter_thresh = YAS530_MAG_DEFAULT_FILTER_THRESH; + } else { + st->read_data = yas532_533_read_data; + st->get_cal_data = get_cal_data_yas532_533; + st->overflow_bound = YAS_YAS532_533_DATA_OVERFLOW; + st->center = YAS_YAS532_533_DATA_CENTER; + st->filter.filter_thresh = YAS532_MAG_DEFAULT_FILTER_THRESH; + } + st->upper_bound = st->center + (st->center >> 1); + st->lower_bound = (st->center >> 1); + + result = yas53x_resume(st); + if (result) + goto out_free; + + indio_dev->dev.parent = &client->dev; + indio_dev->name = id->name; + indio_dev->channels = compass_channels; + indio_dev->num_channels = ARRAY_SIZE(compass_channels); + indio_dev->info = &yas53x_info; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->currentmode = INDIO_DIRECT_MODE; + + result = inv_yas53x_configure_ring(indio_dev); + if (result) + goto out_free; + result = iio_buffer_register(indio_dev, indio_dev->channels, + indio_dev->num_channels); + if (result) + goto out_unreg_ring; + result = inv_yas53x_probe_trigger(indio_dev); + if (result) + goto out_remove_ring; + + result = iio_device_register(indio_dev); + if (result) + goto out_remove_trigger; + INIT_DELAYED_WORK(&st->work, yas53x_work_func); + pr_info("%s: Probe name %s\n", __func__, id->name); + + return 0; +out_remove_trigger: + if (indio_dev->modes & INDIO_BUFFER_TRIGGERED) + inv_yas53x_remove_trigger(indio_dev); +out_remove_ring: + iio_buffer_unregister(indio_dev); +out_unreg_ring: + inv_yas53x_unconfigure_ring(indio_dev); +out_free: + iio_free_device(indio_dev); +out_no_free: + dev_err(&client->adapter->dev, "%s failed %d\n", __func__, result); + return -EIO; +} + +/** + * inv_yas53x_remove() - remove function. + */ +static int inv_yas53x_remove(struct i2c_client *client) +{ + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct inv_compass_state *st = iio_priv(indio_dev); + cancel_delayed_work_sync(&st->work); + iio_device_unregister(indio_dev); + inv_yas53x_remove_trigger(indio_dev); + iio_buffer_unregister(indio_dev); + inv_yas53x_unconfigure_ring(indio_dev); + iio_free_device(indio_dev); + + dev_info(&client->adapter->dev, "inv_yas53x_iio module removed.\n"); + return 0; +} +static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; +/* device id table is used to identify what device can be + * supported by this driver + */ +static const struct i2c_device_id inv_yas53x_id[] = { + {"yas530", 0}, + {"yas532", 0}, + {"yas533", 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, inv_yas53x_id); + +static struct i2c_driver inv_yas53x_driver = { + .class = I2C_CLASS_HWMON, + .probe = inv_yas53x_probe, + .remove = inv_yas53x_remove, + .id_table = inv_yas53x_id, + .driver = { + .owner = THIS_MODULE, + .name = "inv_yas53x_iio", + }, + .address_list = normal_i2c, +}; + +static int __init inv_yas53x_init(void) +{ + int result = i2c_add_driver(&inv_yas53x_driver); + if (result) { + pr_err("%s failed\n", __func__); + return result; + } + return 0; +} + +static void __exit inv_yas53x_exit(void) +{ + i2c_del_driver(&inv_yas53x_driver); +} + +module_init(inv_yas53x_init); +module_exit(inv_yas53x_exit); + +MODULE_AUTHOR("Invensense Corporation"); +MODULE_DESCRIPTION("Invensense device driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("inv_yas53x_iio"); +/** + * @} + */ + diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_iio.h b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_iio.h new file mode 100644 index 00000000000..92bf0af7ec7 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_iio.h @@ -0,0 +1,172 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_iio.h + * @brief Struct definitions for the Invensense implementation + * of yas53x driver. + */ + +#ifndef _INV_GYRO_H_ +#define _INV_GYRO_H_ + +#include +#include +#include +#include +#include +#include + +#include "iio.h" +#include "buffer.h" +#include "trigger.h" + +#define YAS_MAG_MAX_FILTER_LEN 30 +struct yas_adaptive_filter { + int num; + int index; + int filter_len; + int filter_noise; + int sequence[YAS_MAG_MAX_FILTER_LEN]; +}; + +struct yas_thresh_filter { + int threshold; + int last; +}; + +struct yas_filter { + int filter_len; + int filter_thresh; + int filter_noise[3]; + struct yas_adaptive_filter adap_filter[3]; + struct yas_thresh_filter thresh_filter[3]; +}; + +/** + * struct inv_compass_state - Driver state variables. + * @plat_data: mpu platform data from board file. + * @client: i2c client handle. + * @chan_info: channel information. + * @trig: IIO trigger. + * @work: work structure. + * @delay: delay to schedule the next work. + * @overflow_bound: bound to determine overflow. + * @center: center of the measurement. + * @compass_data[3]: compass data store. + * @offset[3]: yas530 specific data. + * @base_compass_data[3]: first measure data after reset. + * @first_measure_after_reset:1: flag for first measurement after reset. + * @first_read_after_reset:1: flag for first read after reset. + * @reset_timer: timer to accumulate overflow conditions. + * @overunderflow:1: overflow and underflow flag. + * @filter: filter data structure. + * @read_data: function pointer of reading data from device. + * @get_cal_data: function pointer of reading cal data. + */ +struct inv_compass_state { + struct mpu_platform_data plat_data; + struct i2c_client *client; + struct iio_trigger *trig; + struct delayed_work work; + s16 delay; + s16 overflow_bound; + s16 upper_bound; + s16 lower_bound; + s16 center; + s16 compass_data[3]; + s8 offset[3]; + s16 base_compass_data[3]; + u8 first_measure_after_reset:1; + u8 first_read_after_reset:1; + u8 overunderflow:1; + s32 reset_timer; + struct yas_filter filter; + int (*read_data)(struct inv_compass_state *st, + int *, u16 *, u16 *, u16 *, u16 *); + int (*get_cal_data)(struct inv_compass_state *); +}; + +/* scan element definition */ +enum inv_mpu_scan { + INV_YAS53X_SCAN_MAGN_X, + INV_YAS53X_SCAN_MAGN_Y, + INV_YAS53X_SCAN_MAGN_Z, + INV_YAS53X_SCAN_TIMESTAMP, +}; + +#define YAS530_REGADDR_DEVICE_ID 0x80 +#define YAS530_REGADDR_ACTUATE_INIT_COIL 0x81 +#define YAS530_REGADDR_MEASURE_COMMAND 0x82 +#define YAS530_REGADDR_CONFIG 0x83 +#define YAS530_REGADDR_MEASURE_INTERVAL 0x84 +#define YAS530_REGADDR_OFFSET_X 0x85 +#define YAS530_REGADDR_OFFSET_Y1 0x86 +#define YAS530_REGADDR_OFFSET_Y2 0x87 +#define YAS530_REGADDR_TEST1 0x88 +#define YAS530_REGADDR_TEST2 0x89 +#define YAS530_REGADDR_CAL 0x90 +#define YAS530_REGADDR_MEASURE_DATA 0xb0 + +#define YAS530_MAX_DELAY 200 +#define YAS530_MIN_DELAY 5 +#define YAS530_SCALE 107374182L + +#define YAS_YAS530_VERSION_A 0 /* YAS530 (MS-3E Aver) */ +#define YAS_YAS530_VERSION_B 1 /* YAS530B (MS-3E Bver) */ +#define YAS_YAS530_VERSION_A_COEF 380 +#define YAS_YAS530_VERSION_B_COEF 550 +#define YAS_YAS530_DATA_CENTER 2048 +#define YAS_YAS530_DATA_OVERFLOW 4095 +#define YAS_YAS530_CAL_DATA_SIZE 16 + +/*filter related defines */ +#define YAS_MAG_DEFAULT_FILTER_NOISE_X 144 /* sd: 1200 nT */ +#define YAS_MAG_DEFAULT_FILTER_NOISE_Y 144 /* sd: 1200 nT */ +#define YAS_MAG_DEFAULT_FILTER_NOISE_Z 144 /* sd: 1200 nT */ +#define YAS_MAG_DEFAULT_FILTER_LEN 20 + +#define YAS530_MAG_DEFAULT_FILTER_THRESH 100 +#define YAS532_MAG_DEFAULT_FILTER_THRESH 300 + +#define YAS_YAS532_533_VERSION_AB 0 /* YAS532_533AB (MS-3R/3F ABver) */ +#define YAS_YAS532_533_VERSION_AC 1 /* YAS532_533AC (MS-3R/3F ACver) */ +#define YAS_YAS532_533_VERSION_AB_COEF 1800 +#define YAS_YAS532_533_VERSION_AC_COEF 900 +#define YAS_YAS532_533_DATA_CENTER 4096 +#define YAS_YAS532_533_DATA_OVERFLOW 8190 +#define YAS_YAS532_533_CAL_DATA_SIZE 14 + +#define YAS_MAG_DISTURBURNCE_THRESHOLD 1600 +#define YAS_RESET_COIL_TIME_THRESHOLD 3000 + +#define INV_ERROR_COMPASS_DATA_OVERFLOW (-1) +#define INV_ERROR_COMPASS_DATA_NOT_READY (-2) + +int inv_yas53x_configure_ring(struct iio_dev *indio_dev); +void inv_yas53x_unconfigure_ring(struct iio_dev *indio_dev); +int inv_yas53x_probe_trigger(struct iio_dev *indio_dev); +void inv_yas53x_remove_trigger(struct iio_dev *indio_dev); +void set_yas53x_enable(struct iio_dev *indio_dev, bool enable); +void inv_read_yas53x_fifo(struct iio_dev *indio_dev); +int yas53x_read(struct inv_compass_state *st, short rawfixed[3], + s32 *overunderflow); +int yas53x_resume(struct inv_compass_state *st); + +#endif /* #ifndef _INV_GYRO_H_ */ + diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_ring.c b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_ring.c new file mode 100644 index 00000000000..efcf49c6839 --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_ring.c @@ -0,0 +1,165 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_ring.c + * @brief Invensense implementation for yas530/yas532/yas533. + * @details This driver currently works for the yas530/yas532/yas533. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iio.h" +#include "kfifo_buf.h" +#include "trigger_consumer.h" +#include "sysfs.h" + +#include "inv_yas53x_iio.h" + +static s64 get_time_ns(void) +{ + struct timespec ts; + ktime_get_ts(&ts); + + return timespec_to_ns(&ts); +} + +static int put_scan_to_buf(struct iio_dev *indio_dev, unsigned char *d, + short *s, int scan_index) +{ + struct iio_buffer *ring = indio_dev->buffer; + int st; + int i, d_ind; + + d_ind = 0; + for (i = 0; i < 3; i++) { + st = iio_scan_mask_query(indio_dev, ring, scan_index + i); + if (st) { + memcpy(&d[d_ind], &s[i], sizeof(s[i])); + d_ind += sizeof(s[i]); + } + } + + return d_ind; +} + +/** + * inv_read_yas53x_fifo() - Transfer data from FIFO to ring buffer. + */ +void inv_read_yas53x_fifo(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + int d_ind; + s32 overunderflow; + s8 *tmp; + s64 tmp_buf[2]; + + if (!yas53x_read(st, st->compass_data, &overunderflow)) { + tmp = (u8 *)tmp_buf; + d_ind = put_scan_to_buf(indio_dev, tmp, st->compass_data, + INV_YAS53X_SCAN_MAGN_X); + if (ring->scan_timestamp) + tmp_buf[(d_ind + 7) / 8] = get_time_ns(); + ring->access->store_to(indio_dev->buffer, tmp, 0); + + if (overunderflow) { + yas53x_resume(st); + if (!st->overunderflow) + st->overunderflow = 1; + } + } +} + +void inv_yas53x_unconfigure_ring(struct iio_dev *indio_dev) +{ + iio_kfifo_free(indio_dev->buffer); +}; + +static int inv_yas53x_postenable(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + + /* when all the outputs are disabled, even though buffer/enable is on, + do nothing */ + if (!(iio_scan_mask_query(indio_dev, ring, INV_YAS53X_SCAN_MAGN_X) || + iio_scan_mask_query(indio_dev, ring, INV_YAS53X_SCAN_MAGN_Y) || + iio_scan_mask_query(indio_dev, ring, INV_YAS53X_SCAN_MAGN_Z))) + return 0; + + set_yas53x_enable(indio_dev, true); + schedule_delayed_work(&st->work, + msecs_to_jiffies(st->delay)); + + return 0; +} + +static int inv_yas53x_predisable(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + struct iio_buffer *ring = indio_dev->buffer; + + cancel_delayed_work_sync(&st->work); + clear_bit(INV_YAS53X_SCAN_MAGN_X, ring->scan_mask); + clear_bit(INV_YAS53X_SCAN_MAGN_Y, ring->scan_mask); + clear_bit(INV_YAS53X_SCAN_MAGN_Z, ring->scan_mask); + + return 0; +} + +static const struct iio_buffer_setup_ops inv_yas53x_ring_setup_ops = { + .preenable = &iio_sw_buffer_preenable, + .postenable = &inv_yas53x_postenable, + .predisable = &inv_yas53x_predisable, +}; + +int inv_yas53x_configure_ring(struct iio_dev *indio_dev) +{ + int ret = 0; + struct iio_buffer *ring; + + ring = iio_kfifo_allocate(indio_dev); + if (!ring) { + ret = -ENOMEM; + return ret; + } + indio_dev->buffer = ring; + /* setup ring buffer */ + ring->scan_timestamp = true; + indio_dev->setup_ops = &inv_yas53x_ring_setup_ops; + + indio_dev->modes |= INDIO_BUFFER_TRIGGERED; + return 0; +} +/** + * @} + */ + diff --git a/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_trigger.c b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_trigger.c new file mode 100644 index 00000000000..a20ce2baa7e --- /dev/null +++ b/drivers/staging/iio/magnetometer/inv_compass/inv_yas53x_trigger.c @@ -0,0 +1,91 @@ +/* +* Copyright (C) 2012 Invensense, Inc. +* +* This software is licensed under the terms of the GNU General Public +* License version 2, as published by the Free Software Foundation, and +* may be copied, distributed, and modified under those terms. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +*/ + +/** + * @addtogroup DRIVERS + * @brief Hardware drivers. + * + * @{ + * @file inv_yas53x_trigger.c + * @brief Invensense implementation for yas530/yas532/yas533 + * @details This driver currently works for the yas530/yas532/yas533 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iio.h" +#include "sysfs.h" +#include "trigger.h" + +#include "inv_yas53x_iio.h" + +static const struct iio_trigger_ops inv_yas53x_trigger_ops = { + .owner = THIS_MODULE, +}; + +int inv_yas53x_probe_trigger(struct iio_dev *indio_dev) +{ + int ret; + struct inv_compass_state *st = iio_priv(indio_dev); + + st->trig = iio_allocate_trigger("%s-dev%d", + indio_dev->name, + indio_dev->id); + if (st->trig == NULL) { + ret = -ENOMEM; + goto error_ret; + } + /* select default trigger */ + st->trig->dev.parent = &st->client->dev; + st->trig->private_data = indio_dev; + st->trig->ops = &inv_yas53x_trigger_ops; + ret = iio_trigger_register(st->trig); + + /* select default trigger */ + indio_dev->trig = st->trig; + if (ret) + goto error_free_trig; + + return 0; + +error_free_trig: + iio_free_trigger(st->trig); +error_ret: + return ret; +} + +void inv_yas53x_remove_trigger(struct iio_dev *indio_dev) +{ + struct inv_compass_state *st = iio_priv(indio_dev); + + iio_trigger_unregister(st->trig); + iio_free_trigger(st->trig); +} +/** + * @} + */ + diff --git a/include/linux/mpu.h b/include/linux/mpu.h index 5105fb201a7..4391226152c 100644 --- a/include/linux/mpu.h +++ b/include/linux/mpu.h @@ -86,6 +86,7 @@ enum ext_slave_id { * @sec_slave_id: id of the secondary slave device * @secondary_i2c_address: secondary device's i2c address * @secondary_orientation: secondary device's orientation matrix + * @key: key for MPL library. * * Contains platform specific information on how to configure the MPU3050 to * work on this platform. The orientation matricies are 3x3 rotation matricies From 820e5b60837b3a7d0f72a0f5977e7926c04378c4 Mon Sep 17 00:00:00 2001 From: Michael Wright Date: Wed, 27 Feb 2013 14:53:59 -0800 Subject: [PATCH 512/678] ARM: tegra: grouper: Enable HIDDEV Change-Id: If0f66e41917ab63ab2adf912e91122129fa1ecb3 Signed-off-by: Michael Wright --- arch/arm/configs/tegra3_android_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 11eb7e67a29..eb2684feac6 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -354,6 +354,7 @@ CONFIG_SND_SOC_TEGRA=y CONFIG_SND_SOC_TEGRA_RT5640=y CONFIG_HEADSET_FUNCTION=y CONFIG_UHID=y +CONFIG_USB_HIDDEV=y CONFIG_HID_A4TECH=y CONFIG_HID_ACRUX=y CONFIG_HID_ACRUX_FF=y From 08f70c43ce7d052dba1b01d59cd7cc97f422d7b0 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 6 Mar 2013 13:14:38 -0800 Subject: [PATCH 513/678] net: ipv6: Don't purge default router if accept_ra=2 Setting net.ipv6.conf..accept_ra=2 causes the kernel to accept RAs even when forwarding is enabled. However, enabling forwarding purges all default routes on the system, breaking connectivity until the next RA is received. Fix this by not purging default routes on interfaces that have accept_ra=2. Signed-off-by: Lorenzo Colitti Acked-by: YOSHIFUJI Hideaki Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Iliyan Malchev --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f02fe523bd3..6f60d8b64a1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1910,7 +1910,8 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); From 5c5c2e36d3a4d76aa77550ba029e3abb1e296bf1 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 9 Oct 2012 20:38:21 -0700 Subject: [PATCH 514/678] netfilter: xt_qtaguid: fix error exit that would keep a spinlock. qtudev_open() could return with a uid_tag_data_tree_lock held when an kzalloc(..., GFP_ATOMIC) would fail. Very unlikely to get triggered AND survive the mayhem of running out of mem. Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index ea716b31e2a..7efbab9b4be 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -2752,7 +2752,7 @@ static int qtudev_open(struct inode *inode, struct file *file) utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); if (IS_ERR_OR_NULL(utd_entry)) { res = PTR_ERR(utd_entry); - goto err; + goto err_unlock; } /* Look for existing PID based proc_data */ @@ -2794,8 +2794,8 @@ static int qtudev_open(struct inode *inode, struct file *file) rb_erase(&utd_entry->node, &uid_tag_data_tree); kfree(utd_entry); } +err_unlock: spin_unlock_bh(&uid_tag_data_tree_lock); -err: return res; } From a0f6a580bc03508201b150c88fd25d6ff42e1f2a Mon Sep 17 00:00:00 2001 From: Pontus Fuchs Date: Mon, 19 Nov 2012 11:44:51 -0800 Subject: [PATCH 515/678] netfilter: qtaguid: Don't BUG_ON if create_if_tag_stat fails If create_if_tag_stat fails to allocate memory (GFP_ATOMIC) the following will happen: qtaguid: iface_stat: tag stat alloc failed ... kernel BUG at xt_qtaguid.c:1482! Signed-off-by: Pontus Fuchs --- net/netfilter/xt_qtaguid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 7efbab9b4be..223fa8be8e8 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1461,6 +1461,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. */ new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); + if (!new_tag_stat) + goto unlock; uid_tag_counters = &new_tag_stat->counters; } else { uid_tag_counters = &tag_stat_entry->counters; @@ -1469,6 +1471,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, if (acct_tag) { /* Create the child {acct_tag, uid_tag} and hook up parent. */ new_tag_stat = create_if_tag_stat(iface_entry, tag); + if (!new_tag_stat) + goto unlock; new_tag_stat->parent_counters = uid_tag_counters; } else { /* @@ -1482,6 +1486,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, BUG_ON(!new_tag_stat); } tag_stat_update(new_tag_stat, direction, proto, bytes); +unlock: spin_unlock_bh(&iface_entry->tag_stat_list_lock); } From 17955b40d9f8dc6818f15c2085ef34f69b451387 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Fri, 4 Jan 2013 18:18:36 -0800 Subject: [PATCH 516/678] netfilter: xt_qtaguid: remove AID_* dependency for access control qtaguid limits what can be done with /ctrl and /stats based on group membership. This changes removes AID_NET_BW_STATS and AID_NET_BW_ACCT, and picks up the groups from the gid of the matching proc entry files. Signed-off-by: JP Abgrall Change-Id: I42e477adde78a12ed5eb58fbc0b277cdaadb6f94 --- net/netfilter/xt_qtaguid.c | 51 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 223fa8be8e8..5b6b4efb32d 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -53,25 +53,22 @@ static unsigned int proc_stats_perms = S_IRUGO; module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); static struct proc_dir_entry *xt_qtaguid_ctrl_file; -#ifdef CONFIG_ANDROID_PARANOID_NETWORK + +/* Everybody can write. But proc_ctrl_write_limited is true by default which + * limits what can be controlled. See the can_*() functions. + */ static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; -#else -static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; -#endif module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); -#ifdef CONFIG_ANDROID_PARANOID_NETWORK -#include -static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; -static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; -#else -/* 0 means, don't limit anybody */ -static gid_t proc_stats_readall_gid; -static gid_t proc_ctrl_write_gid; -#endif -module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, +/* Limited by default, so the gid of the ctrl and stats proc entries + * will limit what can be done. See the can_*() functions. + */ +static bool proc_stats_readall_limited = true; +static bool proc_ctrl_write_limited = true; + +module_param_named(stats_readall_limited, proc_stats_readall_limited, bool, S_IRUGO | S_IWUSR); -module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, +module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool, S_IRUGO | S_IWUSR); /* @@ -242,8 +239,9 @@ static struct qtaguid_event_counts qtu_events; static bool can_manipulate_uids(void) { /* root pwnd */ - return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) - || in_egroup_p(proc_ctrl_write_gid); + return in_egroup_p(xt_qtaguid_ctrl_file->gid) + || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited) + || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); } static bool can_impersonate_uid(uid_t uid) @@ -254,9 +252,10 @@ static bool can_impersonate_uid(uid_t uid) static bool can_read_other_uid_stats(uid_t uid) { /* root pwnd */ - return unlikely(!current_fsuid()) || uid == current_fsuid() - || unlikely(!proc_stats_readall_gid) - || in_egroup_p(proc_stats_readall_gid); + return in_egroup_p(xt_qtaguid_stats_file->gid) + || unlikely(!current_fsuid()) || uid == current_fsuid() + || unlikely(!proc_stats_readall_limited) + || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); } static inline void dc_add_byte_packets(struct data_counters *counters, int set, @@ -2302,11 +2301,12 @@ static int ctrl_cmd_tag(const char *input) } CT_DEBUG("qtaguid: ctrl_tag(%s): " "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " - "in_group=%d in_egroup=%d\n", + "ctrl.gid=%u in_group()=%d in_egroup()=%d\n", input, current->pid, current->tgid, current_uid(), current_euid(), current_fsuid(), - in_group_p(proc_ctrl_write_gid), - in_egroup_p(proc_ctrl_write_gid)); + xt_qtaguid_ctrl_file->gid, + in_group_p(xt_qtaguid_ctrl_file->gid), + in_egroup_p(xt_qtaguid_ctrl_file->gid)); if (argc < 4) { uid = current_fsuid(); } else if (!can_impersonate_uid(uid)) { @@ -2598,10 +2598,11 @@ static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) && !can_read_other_uid_stats(stat_uid)) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " - "from pid=%u tgid=%u uid=%u\n", + "from pid=%u tgid=%u uid=%u stats.gid=%u\n", ppi->iface_entry->ifname, get_atag_from_tag(tag), stat_uid, - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, current_fsuid(), + xt_qtaguid_stats_file->gid); return 0; } if (ppi->item_index++ < ppi->items_to_skip) From b7e119cf040ce12c7cf1dde9c960e733c581cb86 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 29 Jan 2013 19:29:35 -0800 Subject: [PATCH 517/678] netfilter: xt_qtaguid: extend iface stat to report protocols In the past the iface_stat_fmt would only show global bytes/packets for the skb-based numbers. For stall detection in userspace, distinguishing tcp vs other protocols makes it easier. Now we report ifname total_skb_rx_bytes total_skb_rx_packets total_skb_tx_bytes total_skb_tx_packets {rx,tx}_{tcp,udp,ohter}_{bytes,packets} Bug: 6818637 Signed-off-by: JP Abgrall Change-Id: I179c5ebf2fe822acec0bce4973b4bbb5e7d5076d --- net/netfilter/xt_qtaguid.c | 90 +++++++++++++++++------------ net/netfilter/xt_qtaguid_internal.h | 21 ++++++- net/netfilter/xt_qtaguid_print.c | 14 +++-- 3 files changed, 82 insertions(+), 43 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 5b6b4efb32d..25f2cee64de 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -268,24 +268,6 @@ static inline void dc_add_byte_packets(struct data_counters *counters, int set, counters->bpc[set][direction][ifs_proto].packets += packets; } -static inline uint64_t dc_sum_bytes(struct data_counters *counters, - int set, - enum ifs_tx_rx direction) -{ - return counters->bpc[set][direction][IFS_TCP].bytes - + counters->bpc[set][direction][IFS_UDP].bytes - + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; -} - -static inline uint64_t dc_sum_packets(struct data_counters *counters, - int set, - enum ifs_tx_rx direction) -{ - return counters->bpc[set][direction][IFS_TCP].packets - + counters->bpc[set][direction][IFS_UDP].packets - + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; -} - static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) { struct rb_node *node = root->rb_node; @@ -787,6 +769,53 @@ static struct iface_stat *get_iface_entry(const char *ifname) return iface_entry; } +/* This is for fmt2 only */ +static int pp_iface_stat_line(bool header, char *outp, + int char_count, struct iface_stat *iface_entry) +{ + int len; + if (header) { + len = snprintf(outp, char_count, + "ifname " + "total_skb_rx_bytes total_skb_rx_packets " + "total_skb_tx_bytes total_skb_tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n" + ); + } else { + struct data_counters *cnts; + int cnt_set = 0; /* We only use one set for the device */ + cnts = &iface_entry->totals_via_skb; + len = snprintf( + outp, char_count, + "%s " + "%llu %llu %llu %llu %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + iface_entry->ifname, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); + } + return len; +} + static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, off_t items_to_skip, int char_count, int *eof, void *data) @@ -816,11 +845,7 @@ static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, return 0; if (fmt == 2 && item_index++ >= items_to_skip) { - len = snprintf(outp, char_count, - "ifname " - "total_skb_rx_bytes total_skb_rx_packets " - "total_skb_tx_bytes total_skb_tx_packets\n" - ); + len = pp_iface_stat_line(true, outp, char_count, NULL); if (len >= char_count) { *outp = '\0'; return outp - page; @@ -865,16 +890,8 @@ static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, stats->tx_bytes, stats->tx_packets ); } else { - len = snprintf( - outp, char_count, - "%s " - "%llu %llu %llu %llu\n", - iface_entry->ifname, - iface_entry->totals_via_skb[IFS_RX].bytes, - iface_entry->totals_via_skb[IFS_RX].packets, - iface_entry->totals_via_skb[IFS_TX].bytes, - iface_entry->totals_via_skb[IFS_TX].packets - ); + len = pp_iface_stat_line(false, outp, char_count, + iface_entry); } if (len >= char_count) { spin_unlock_bh(&iface_stat_list_lock); @@ -1304,6 +1321,7 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, const struct net_device *el_dev; enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; int bytes = skb->len; + int proto; if (!skb->dev) { MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); @@ -1329,7 +1347,7 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, par->hooknum, __func__); BUG(); } else { - int proto = ipx_proto(skb, par); + proto = ipx_proto(skb, par); MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", par->hooknum, el_dev->name, el_dev->type, par->family, proto); @@ -1347,8 +1365,8 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, el_dev->name, entry); - entry->totals_via_skb[direction].bytes += bytes; - entry->totals_via_skb[direction].packets++; + data_counters_update(&entry->totals_via_skb, 0, direction, proto, + bytes); spin_unlock_bh(&iface_stat_list_lock); } diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h index d79f8383abf..6dc14a9c688 100644 --- a/net/netfilter/xt_qtaguid_internal.h +++ b/net/netfilter/xt_qtaguid_internal.h @@ -179,6 +179,25 @@ struct data_counters { struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; }; +static inline uint64_t dc_sum_bytes(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].bytes + + counters->bpc[set][direction][IFS_UDP].bytes + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; +} + +static inline uint64_t dc_sum_packets(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].packets + + counters->bpc[set][direction][IFS_UDP].packets + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; +} + + /* Generic X based nodes used as a base for rb_tree ops */ struct tag_node { struct rb_node node; @@ -203,7 +222,7 @@ struct iface_stat { struct net_device *net_dev; struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; - struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS]; + struct data_counters totals_via_skb; /* * We keep the last_known, because some devices reset their counters * just before NETDEV_UP, while some will reset just before diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c index 8cbd8e42bcc..f6a00a3520e 100644 --- a/net/netfilter/xt_qtaguid_print.c +++ b/net/netfilter/xt_qtaguid_print.c @@ -177,9 +177,10 @@ char *pp_tag_stat(struct tag_stat *ts) char *pp_iface_stat(struct iface_stat *is) { char *res; - if (!is) + if (!is) { res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); - else + } else { + struct data_counters *cnts = &is->totals_via_skb; res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" "list=list_head{...}, " "ifname=%s, " @@ -206,10 +207,10 @@ char *pp_iface_stat(struct iface_stat *is) is->totals_via_dev[IFS_RX].packets, is->totals_via_dev[IFS_TX].bytes, is->totals_via_dev[IFS_TX].packets, - is->totals_via_skb[IFS_RX].bytes, - is->totals_via_skb[IFS_RX].packets, - is->totals_via_skb[IFS_TX].bytes, - is->totals_via_skb[IFS_TX].packets, + dc_sum_bytes(cnts, 0, IFS_RX), + dc_sum_packets(cnts, 0, IFS_RX), + dc_sum_bytes(cnts, 0, IFS_TX), + dc_sum_packets(cnts, 0, IFS_TX), is->last_known_valid, is->last_known[IFS_RX].bytes, is->last_known[IFS_RX].packets, @@ -218,6 +219,7 @@ char *pp_iface_stat(struct iface_stat *is) is->active, is->net_dev, is->proc_ptr); + } _bug_on_err_or_null(res); return res; } From 005fe95fedfee54f09236054ccff992c4c0a5c78 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 6 Feb 2013 17:40:07 -0800 Subject: [PATCH 518/678] netfilter: xt_qtaguid: Allow tracking loopback In the past it would always ignore interfaces with loopback addresses. Now we just treat them like any other. This also helps with writing tests that check for the presence of the qtaguid module. Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 25f2cee64de..495b62ea0b6 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1108,18 +1108,13 @@ static void iface_stat_create(struct net_device *net_dev, spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(ifname); if (entry != NULL) { - bool activate = !ipv4_is_loopback(ipaddr); IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", ifname, entry); iface_check_stats_reset_and_adjust(net_dev, entry); - _iface_stat_set_active(entry, net_dev, activate); + _iface_stat_set_active(entry, net_dev, true); IF_DEBUG("qtaguid: %s(%s): " "tracking now %d on ip=%pI4\n", __func__, - entry->ifname, activate, &ipaddr); - goto done_unlock_put; - } else if (ipv4_is_loopback(ipaddr)) { - IF_DEBUG("qtaguid: iface_stat: create(%s): " - "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); + entry->ifname, true, &ipaddr); goto done_unlock_put; } @@ -1170,19 +1165,13 @@ static void iface_stat_create_ipv6(struct net_device *net_dev, spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(ifname); if (entry != NULL) { - bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, ifname, entry); iface_check_stats_reset_and_adjust(net_dev, entry); - _iface_stat_set_active(entry, net_dev, activate); + _iface_stat_set_active(entry, net_dev, true); IF_DEBUG("qtaguid: %s(%s): " "tracking now %d on ip=%pI6c\n", __func__, - entry->ifname, activate, &ifa->addr); - goto done_unlock_put; - } else if (addr_type & IPV6_ADDR_LOOPBACK) { - IF_DEBUG("qtaguid: %s(%s): " - "ignore loopback dev. ip=%pI6c\n", __func__, - ifname, &ifa->addr); + entry->ifname, true, &ifa->addr); goto done_unlock_put; } From 682b261047b023929d3420aba78519f6b6aa9075 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Mon, 18 Mar 2013 10:54:41 -0700 Subject: [PATCH 519/678] net: wireless: bcmdhd: Fix p2p "linear" IE parsing Change-Id: Id9e358897529940eaaa6654f67297e1b77f52d15 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfgp2p.c | 40 +++++++++++++++++++++---- drivers/net/wireless/bcmdhd/wl_cfgp2p.h | 3 ++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c index 38c81cf94f4..8fcc13c4d30 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.c +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.c @@ -1830,6 +1830,38 @@ wl_cfgp2p_retreive_p2pattrib(void *buf, u8 element_id) } #define P2P_GROUP_CAPAB_GO_BIT 0x01 + +u8* +wl_cfgp2p_find_attrib_in_all_p2p_Ies(u8 *parse, u32 len, u32 attrib) +{ + bcm_tlv_t *ie; + u8* pAttrib; + + CFGP2P_INFO(("Starting parsing parse %p attrib %d remaining len %d ", parse, attrib, len)); + while ((ie = bcm_parse_tlvs(parse, (int)len, DOT11_MNG_VS_ID))) { + if (wl_cfgp2p_is_p2p_ie((uint8*)ie, &parse, &len) == TRUE) { + /* Have the P2p ie. Now check for attribute */ + if ((pAttrib = wl_cfgp2p_retreive_p2pattrib(parse, attrib)) != NULL) { + CFGP2P_INFO(("P2P attribute %d was found at parse %p", + attrib, parse)); + return pAttrib; + } + else { + parse += (ie->len + TLV_HDR_LEN); + len -= (ie->len + TLV_HDR_LEN); + CFGP2P_INFO(("P2P Attribute %d not found Moving parse" + " to %p len to %d", attrib, parse, len)); + } + } + else { + /* It was not p2p IE. parse will get updated automatically to next TLV */ + CFGP2P_INFO(("IT was NOT P2P IE parse %p len %d", parse, len)); + } + } + CFGP2P_ERR(("P2P attribute %d was NOT found", attrib)); + return NULL; +} + u8 * wl_cfgp2p_retreive_p2p_dev_addr(wl_bss_info_t *bi, u32 bi_length) { @@ -1838,12 +1870,8 @@ wl_cfgp2p_retreive_p2p_dev_addr(wl_bss_info_t *bi, u32 bi_length) bool p2p_go = 0; u8 *ptr = NULL; - if (!(p2p_ie = wl_cfgp2p_find_p2pie(((u8 *) bi) + bi->ie_offset, bi->ie_length))) { - WL_ERR(("P2P IE not found")); - return NULL; - } - - if (!(capability = wl_cfgp2p_retreive_p2pattrib(p2p_ie, P2P_SEID_P2P_INFO))) { + if ((capability = wl_cfgp2p_find_attrib_in_all_p2p_Ies(((u8 *) bi) + bi->ie_offset, + bi->ie_length, P2P_SEID_P2P_INFO)) == NULL) { WL_ERR(("P2P Capability attribute not found")); return NULL; } diff --git a/drivers/net/wireless/bcmdhd/wl_cfgp2p.h b/drivers/net/wireless/bcmdhd/wl_cfgp2p.h index 03a645aea31..be5ddba73a4 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfgp2p.h +++ b/drivers/net/wireless/bcmdhd/wl_cfgp2p.h @@ -254,6 +254,9 @@ wl_cfgp2p_set_p2p_ps(struct wl_priv *wl, struct net_device *ndev, char* buf, int extern u8 * wl_cfgp2p_retreive_p2pattrib(void *buf, u8 element_id); +extern u8* +wl_cfgp2p_find_attrib_in_all_p2p_Ies(u8 *parse, u32 len, u32 attrib); + extern u8 * wl_cfgp2p_retreive_p2p_dev_addr(wl_bss_info_t *bi, u32 bi_length); From 8caa77982caa8d7d73949c7780837a76b8bc38ec Mon Sep 17 00:00:00 2001 From: Ed Tam Date: Fri, 29 Mar 2013 11:17:06 -0700 Subject: [PATCH 520/678] Disable access to .config through /proc/config.gz BUG: 8445234 Signed-off-by: Ed Tam --- arch/arm/configs/tegra3_android_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index eb2684feac6..2f460110e6f 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -1,6 +1,6 @@ CONFIG_EXPERIMENTAL=y CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y +CONFIG_IKCONFIG_PROC=n CONFIG_CGROUPS=y CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y From b708038edc0b7a2043ac433805b689eea58c8aaa Mon Sep 17 00:00:00 2001 From: Ed Tam Date: Fri, 29 Mar 2013 11:18:56 -0700 Subject: [PATCH 521/678] Disable loadable module support BUG: 8445904 Signed-off-by: Ed Tam --- arch/arm/configs/tegra3_android_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 2f460110e6f..7843a3c5b86 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -17,7 +17,7 @@ CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y -CONFIG_MODULES=y +CONFIG_MODULES=n CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set From 69f58b34d592cc95071a7979d65a4953fc66aca9 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 19 Dec 2011 16:44:10 -0500 Subject: [PATCH 522/678] Add permission checking for binder IPC. Signed-off-by: Ed Tam --- drivers/staging/android/binder.c | 21 +++++++++ include/linux/security.h | 29 ++++++++++++ security/capability.c | 24 ++++++++++ security/security.c | 20 +++++++++ security/selinux/hooks.c | 68 +++++++++++++++++++++++++++++ security/selinux/include/classmap.h | 1 + 6 files changed, 163 insertions(+) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index e13b4c48340..5c1b41b51bb 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "binder.h" @@ -1467,6 +1468,10 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_DEAD_REPLY; goto err_dead_binder; } + if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { + return_error = BR_FAILED_REPLY; + goto err_invalid_target_handle; + } if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) { struct binder_transaction *tmp; tmp = thread->transaction_stack; @@ -1612,6 +1617,10 @@ static void binder_transaction(struct binder_proc *proc, fp->cookie, node->cookie); goto err_binder_get_ref_for_node_failed; } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk, node->proc->tsk)) { + return_error = BR_FAILED_REPLY; + goto err_binder_get_ref_for_node_failed; + } ref = binder_get_ref_for_node(target_proc, node); if (ref == NULL) { return_error = BR_FAILED_REPLY; @@ -1641,6 +1650,10 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_binder_get_ref_failed; } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk, ref->node->proc->tsk)) { + return_error = BR_FAILED_REPLY; + goto err_binder_get_ref_failed; + } if (ref->node->proc == target_proc) { if (fp->type == BINDER_TYPE_HANDLE) fp->type = BINDER_TYPE_BINDER; @@ -1694,6 +1707,11 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_fget_failed; } + if (security_binder_transfer_file(proc->tsk, target_proc->tsk, file) < 0) { + fput(file); + return_error = BR_FAILED_REPLY; + goto err_get_unused_fd_failed; + } target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); if (target_fd < 0) { fput(file); @@ -2699,6 +2717,9 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ret = -EBUSY; goto err; } + ret = security_binder_set_context_mgr(proc->tsk); + if (ret < 0) + goto err; if (binder_context_mgr_uid != -1) { if (binder_context_mgr_uid != current->cred->euid) { printk(KERN_ERR "binder: BINDER_SET_" diff --git a/include/linux/security.h b/include/linux/security.h index ebd2a53a3d0..42c92d3ad79 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1375,6 +1375,11 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) struct security_operations { char name[SECURITY_NAME_MAX + 1]; + int (*binder_set_context_mgr) (struct task_struct *mgr); + int (*binder_transaction) (struct task_struct *from, struct task_struct *to); + int (*binder_transfer_binder) (struct task_struct *from, struct task_struct *to, struct task_struct *owner); + int (*binder_transfer_file) (struct task_struct *from, struct task_struct *to, struct file *file); + int (*ptrace_access_check) (struct task_struct *child, unsigned int mode); int (*ptrace_traceme) (struct task_struct *parent); int (*capget) (struct task_struct *target, @@ -1657,6 +1662,10 @@ extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); /* Security operations */ +int security_binder_set_context_mgr(struct task_struct *mgr); +int security_binder_transaction(struct task_struct *from, struct task_struct *to); +int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner); +int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -1836,6 +1845,26 @@ static inline int security_init(void) return 0; } +static inline int security_binder_set_context_mgr(struct task_struct *mgr) +{ + return 0; +} + +static inline int security_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + return 0; +} + +static inline int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +{ + return 0; +} + +static inline int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + return 0; +} + static inline int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { diff --git a/security/capability.c b/security/capability.c index 2984ea4f776..27cc40b4922 100644 --- a/security/capability.c +++ b/security/capability.c @@ -12,6 +12,26 @@ #include +static int cap_binder_set_context_mgr(struct task_struct *mgr) +{ + return 0; +} + +static int cap_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + return 0; +} + +static int cap_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +{ + return 0; +} + +static int cap_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + return 0; +} + static int cap_syslog(int type) { return 0; @@ -874,6 +894,10 @@ static void cap_audit_rule_free(void *lsmrule) void __init security_fixup_ops(struct security_operations *ops) { + set_to_cap_if_null(ops, binder_set_context_mgr); + set_to_cap_if_null(ops, binder_transaction); + set_to_cap_if_null(ops, binder_transfer_binder); + set_to_cap_if_null(ops, binder_transfer_file); set_to_cap_if_null(ops, ptrace_access_check); set_to_cap_if_null(ops, ptrace_traceme); set_to_cap_if_null(ops, capget); diff --git a/security/security.c b/security/security.c index d9e15339092..aa61e9eba44 100644 --- a/security/security.c +++ b/security/security.c @@ -127,6 +127,26 @@ int __init register_security(struct security_operations *ops) /* Security operations */ +int security_binder_set_context_mgr(struct task_struct *mgr) +{ + return security_ops->binder_set_context_mgr(mgr); +} + +int security_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + return security_ops->binder_transaction(from, to); +} + +int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +{ + return security_ops->binder_transfer_binder(from, to, owner); +} + +int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + return security_ops->binder_transfer_file(from, to, file); +} + int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { return security_ops->ptrace_access_check(child, mode); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 266a2292451..94243f2fb81 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1805,6 +1805,69 @@ static inline u32 open_file_to_av(struct file *file) /* Hook functions begin here. */ +static int selinux_binder_set_context_mgr(struct task_struct *mgr) +{ + u32 mysid = current_sid(); + u32 mgrsid = task_sid(mgr); + + return avc_has_perm(mysid, mgrsid, SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL); +} + +static int selinux_binder_transaction(struct task_struct *from, struct task_struct *to) +{ + u32 mysid = current_sid(); + u32 fromsid = task_sid(from); + u32 tosid = task_sid(to); + int rc; + + if (mysid != fromsid) { + rc = avc_has_perm(mysid, fromsid, SECCLASS_BINDER, BINDER__IMPERSONATE, NULL); + if (rc) + return rc; + } + + return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, NULL); +} + +static int selinux_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +{ + u32 fromsid = task_sid(from); + u32 tosid = task_sid(to); + u32 ownersid = task_sid(owner); + int rc; + + rc = avc_has_perm(fromsid, ownersid, SECCLASS_BINDER, BINDER__TRANSFER, NULL); + if (rc) + return rc; + + return avc_has_perm(tosid, ownersid, SECCLASS_BINDER, BINDER__RECEIVE, NULL); +} + +static int selinux_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) +{ + u32 sid = task_sid(to); + struct file_security_struct *fsec = file->f_security; + struct inode *inode = file->f_path.dentry->d_inode; + struct inode_security_struct *isec = inode->i_security; + struct common_audit_data ad; + int rc; + + COMMON_AUDIT_DATA_INIT(&ad, PATH); + ad.u.path = file->f_path; + + if (sid != fsec->sid) { + rc = avc_has_perm(sid, fsec->sid, + SECCLASS_FD, + FD__USE, + &ad); + if (rc) + return rc; + } + + return avc_has_perm(sid, isec->sid, isec->sclass, file_to_av(file), + &ad); +} + static int selinux_ptrace_access_check(struct task_struct *child, unsigned int mode) { @@ -5458,6 +5521,11 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer) static struct security_operations selinux_ops = { .name = "selinux", + .binder_set_context_mgr = selinux_binder_set_context_mgr, + .binder_transaction = selinux_binder_transaction, + .binder_transfer_binder = selinux_binder_transfer_binder, + .binder_transfer_file = selinux_binder_transfer_file, + .ptrace_access_check = selinux_ptrace_access_check, .ptrace_traceme = selinux_ptrace_traceme, .capget = selinux_capget, diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index b8c53723e09..e1051603631 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -149,5 +149,6 @@ struct security_class_mapping secclass_map[] = { { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, + { "binder", { "impersonate", "call", "set_context_mgr", "transfer", "receive", NULL } }, { NULL } }; From 3b40cf76363a59397aef97310962abfef73ea819 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 15 Nov 2012 14:01:44 -0500 Subject: [PATCH 523/678] Fix security_binder_transfer_binder hook. Drop the owning task argument to security_binder_transfer_binder since ref->node->proc can be NULL (dead owner?). Revise the SELinux checking to apply a single transfer check between the source and destination tasks. Owning task is no longer relevant. Drop the receive permission definition as it is no longer used. This makes the transfer permission similar to the call permission; it is only useful if you want to allow a binder IPC between two tasks (call permission) but deny passing of binder references between them (transfer permission). Signed-off-by: Ed Tam --- drivers/staging/android/binder.c | 4 ++-- include/linux/security.h | 6 +++--- security/capability.c | 2 +- security/security.c | 4 ++-- security/selinux/hooks.c | 11 ++--------- security/selinux/include/classmap.h | 2 +- 6 files changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 5c1b41b51bb..618ad8a443f 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -1617,7 +1617,7 @@ static void binder_transaction(struct binder_proc *proc, fp->cookie, node->cookie); goto err_binder_get_ref_for_node_failed; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk, node->proc->tsk)) { + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } @@ -1650,7 +1650,7 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_binder_get_ref_failed; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk, ref->node->proc->tsk)) { + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { return_error = BR_FAILED_REPLY; goto err_binder_get_ref_failed; } diff --git a/include/linux/security.h b/include/linux/security.h index 42c92d3ad79..95a6d8e24df 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1377,7 +1377,7 @@ struct security_operations { int (*binder_set_context_mgr) (struct task_struct *mgr); int (*binder_transaction) (struct task_struct *from, struct task_struct *to); - int (*binder_transfer_binder) (struct task_struct *from, struct task_struct *to, struct task_struct *owner); + int (*binder_transfer_binder) (struct task_struct *from, struct task_struct *to); int (*binder_transfer_file) (struct task_struct *from, struct task_struct *to, struct file *file); int (*ptrace_access_check) (struct task_struct *child, unsigned int mode); @@ -1664,7 +1664,7 @@ extern int register_security(struct security_operations *ops); /* Security operations */ int security_binder_set_context_mgr(struct task_struct *mgr); int security_binder_transaction(struct task_struct *from, struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner); +int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to); int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); @@ -1855,7 +1855,7 @@ static inline int security_binder_transaction(struct task_struct *from, struct t return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +static inline int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to) { return 0; } diff --git a/security/capability.c b/security/capability.c index 27cc40b4922..4da1d8c5a5d 100644 --- a/security/capability.c +++ b/security/capability.c @@ -22,7 +22,7 @@ static int cap_binder_transaction(struct task_struct *from, struct task_struct * return 0; } -static int cap_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +static int cap_binder_transfer_binder(struct task_struct *from, struct task_struct *to) { return 0; } diff --git a/security/security.c b/security/security.c index aa61e9eba44..420198e5f32 100644 --- a/security/security.c +++ b/security/security.c @@ -137,9 +137,9 @@ int security_binder_transaction(struct task_struct *from, struct task_struct *to return security_ops->binder_transaction(from, to); } -int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to) { - return security_ops->binder_transfer_binder(from, to, owner); + return security_ops->binder_transfer_binder(from, to); } int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 94243f2fb81..a3e651d4bdb 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1829,18 +1829,11 @@ static int selinux_binder_transaction(struct task_struct *from, struct task_stru return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, NULL); } -static int selinux_binder_transfer_binder(struct task_struct *from, struct task_struct *to, struct task_struct *owner) +static int selinux_binder_transfer_binder(struct task_struct *from, struct task_struct *to) { u32 fromsid = task_sid(from); u32 tosid = task_sid(to); - u32 ownersid = task_sid(owner); - int rc; - - rc = avc_has_perm(fromsid, ownersid, SECCLASS_BINDER, BINDER__TRANSFER, NULL); - if (rc) - return rc; - - return avc_has_perm(tosid, ownersid, SECCLASS_BINDER, BINDER__RECEIVE, NULL); + return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, NULL); } static int selinux_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index e1051603631..4a4a9aebca9 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -149,6 +149,6 @@ struct security_class_mapping secclass_map[] = { { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, - { "binder", { "impersonate", "call", "set_context_mgr", "transfer", "receive", NULL } }, + { "binder", { "impersonate", "call", "set_context_mgr", "transfer", NULL } }, { NULL } }; From 882c32a14b58313214cee35cbc3a177a018a0891 Mon Sep 17 00:00:00 2001 From: Ed Tam Date: Fri, 29 Mar 2013 22:11:13 -0700 Subject: [PATCH 524/678] Enable SELinux and its dependencies in the tegra3_android_defconfig Signed-off-by: Ed Tam --- arch/arm/configs/tegra3_android_defconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 7843a3c5b86..4b859b32d14 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -1,6 +1,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=n +CONFIG_AUDIT=y CONFIG_CGROUPS=y CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y @@ -97,6 +98,7 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CT_PROTO_DCCP=y CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y @@ -112,10 +114,12 @@ CONFIG_NF_CT_NETLINK=y CONFIG_NETFILTER_TPROXY=y CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y CONFIG_NETFILTER_XT_TARGET_TPROXY=y CONFIG_NETFILTER_XT_TARGET_TRACE=y CONFIG_NETFILTER_XT_MATCH_COMMENT=y @@ -156,6 +160,7 @@ CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y CONFIG_IP_NF_ARPTABLES=y CONFIG_IP_NF_ARPFILTER=y CONFIG_IP_NF_ARP_MANGLE=y @@ -496,6 +501,10 @@ CONFIG_DEBUG_VM=y CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_DYNAMIC_DEBUG=y CONFIG_TRUSTED_FOUNDATIONS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_LSM_MMAP_MIN_ADDR=4096 +CONFIG_SECURITY_SELINUX=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TWOFISH=y # CONFIG_CRYPTO_ANSI_CPRNG is not set From 89bad9bbd9aa1ba4f1800a77b05cabc32a4810ff Mon Sep 17 00:00:00 2001 From: Joseph Wu Date: Wed, 27 Mar 2013 22:23:26 +0800 Subject: [PATCH 525/678] Sensors: Some updates for Invensense v5.1.5 IIO driver release. Change-Id: Idfc8df88847c05ef358026d47bd0c2e8304a5621 Signed-off-by: Joseph Wu --- drivers/staging/iio/imu/mpu/inv_mpu_iio.h | 2 +- drivers/staging/iio/imu/mpu/inv_mpu_misc.c | 19 +++++++++++++------ drivers/staging/iio/imu/mpu/inv_mpu_ring.c | 2 +- drivers/staging/iio/imu/mpu/inv_mpu_trigger.c | 3 ++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h index b26d27b1548..9d99d9bac1c 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h +++ b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h @@ -594,6 +594,7 @@ struct inv_mpu_slave { #define MAX_GYRO_FS_PARAM 3 #define MAX_ACCL_FS_PARAM 3 #define MAX_LPA_FREQ_PARAM 3 +#define MPU6XXX_MAX_MPU_MEM (256 * 12) #define INIT_MOT_DUR 128 #define INIT_MOT_THR 128 @@ -842,7 +843,6 @@ void inv_setup_reg_mpu3050(struct inv_reg_map_s *reg); int inv_switch_3050_gyro_engine(struct inv_mpu_iio_s *st, bool en); int inv_switch_3050_accl_engine(struct inv_mpu_iio_s *st, bool en); int set_power_mpu3050(struct inv_mpu_iio_s *st, bool power_on); -int set_inv_enable(struct iio_dev *indio_dev, bool enable); int inv_set_interrupt_on_gesture_event(struct inv_mpu_iio_s *st, bool on); int inv_send_quaternion(struct inv_mpu_iio_s *st, bool on); int inv_set_display_orient_interrupt_dmp(struct inv_mpu_iio_s *st, bool on); diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c index ec90fc82428..d672fa482a8 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c @@ -447,13 +447,15 @@ int mpu_memory_read(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, int mpu_memory_write_unaligned(struct inv_mpu_iio_s *st, u16 key, int len, u8 const *d) { - int addr; + u32 addr; int start, end; int len1, len2; int result = 0; if (len > MPU_MEM_BANK_SIZE) return -EINVAL; addr = inv_dmp_get_address(key); + if (addr > MPU6XXX_MAX_MPU_MEM) + return -EINVAL; start = (addr >> 8); end = ((addr + len - 1) >> 8); if (start == end) { @@ -1942,6 +1944,8 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, if (st->chip_config.firmware_loaded) return -EINVAL; + if (st->chip_config.enable) + return -EBUSY; reg = &st->reg; if (DMP_IMAGE_SIZE != size) { @@ -2025,10 +2029,12 @@ ssize_t inv_dmp_firmware_read(struct file *filp, data = 0; mutex_lock(&indio_dev->mlock); - result = st->set_power_state(st, true); - if (result) { - mutex_unlock(&indio_dev->mlock); - return result; + if (!st->chip_config.enable) { + result = st->set_power_state(st, true); + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } } for (bank = 0; size > 0; bank++, size -= write_size, data += write_size) { @@ -2045,7 +2051,8 @@ ssize_t inv_dmp_firmware_read(struct file *filp, return result; } } - result = st->set_power_state(st, false); + if (!st->chip_config.enable) + result = st->set_power_state(st, false); mutex_unlock(&indio_dev->mlock); if (result) return result; diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c index 29ce25078cf..fcebf833569 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c @@ -416,7 +416,7 @@ static int inv_set_dmp_sysfs(struct inv_mpu_iio_s *st) * @st: Device driver instance. * @fifo_enable: enable/disable */ -int set_inv_enable(struct iio_dev *indio_dev, +static int set_inv_enable(struct iio_dev *indio_dev, bool enable) { struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct inv_reg_map_s *reg; diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c index 2dd8c0c4a0c..45cd33932e6 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c @@ -52,7 +52,8 @@ static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig, struct iio_dev *indio_dev = trig->private_data; dev_dbg(&indio_dev->dev, "%s (%d)\n", __func__, state); - return set_inv_enable(indio_dev, state); + + return 0; } static const struct iio_trigger_ops inv_mpu_trigger_ops = { From 62aa9d8414052da9822a26d71b052b6406e2e6f7 Mon Sep 17 00:00:00 2001 From: jim1_lin Date: Mon, 1 Apr 2013 15:25:22 +0800 Subject: [PATCH 526/678] ARM: tegra: grouper: Add region mapping for IR. There is no region domain mapping for IR in firmware, add mapping for IR from driver side. Change-Id: Iffba01b63f55fcc951ac036ba20679a500663de9 Signed-off-by: jim1_lin --- arch/arm/mach-tegra/board-grouper-sdhci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-sdhci.c b/arch/arm/mach-tegra/board-grouper-sdhci.c index 7428c18a3b9..b2b639baa9b 100644 --- a/arch/arm/mach-tegra/board-grouper-sdhci.c +++ b/arch/arm/mach-tegra/board-grouper-sdhci.c @@ -40,7 +40,7 @@ static void (*wifi_status_cb)(int card_present, void *dev_id); static void *wifi_status_cb_devid; -static int grouper_wifi_status_register(void (*callback)(int , void *),void *); +static int grouper_wifi_status_register(void (*callback)(int, void *), void *); static int grouper_wifi_reset(int on); static int grouper_wifi_power(int on); @@ -56,7 +56,8 @@ typedef struct cntry_locales_custom { static cntry_locales_custom_t grouper_wifi_translate_custom_table[] = { /* Table should be filled out based on custom platform regulatory requirement */ - {"RU", "XY", 4} + {"RU", "XY", 4}, + {"IR", "XY", 4} }; static void *grouper_wifi_get_country_code(char *ccode) From 10d39839a27194d4d946f5ed15a480f357a8d9a5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 19 Feb 2013 14:56:51 +0100 Subject: [PATCH 527/678] ptrace: introduce signal_wake_up_state() and ptrace_signal_wake_up() Cleanup and preparation for the next change. signal_wake_up(resume => true) is overused. None of ptrace/jctl callers actually want to wakeup a TASK_WAKEKILL task, but they can't specify the necessary mask. Turn signal_wake_up() into signal_wake_up_state(state), reintroduce signal_wake_up() as a trivial helper, and add ptrace_signal_wake_up() which adds __TASK_TRACED. This way ptrace_signal_wake_up() can work "inside" ptrace_request() even if the tracee doesn't have the TASK_WAKEKILL bit set. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ed Tam Conflicts: kernel/ptrace.c --- include/linux/sched.h | 11 ++++++++++- kernel/ptrace.c | 4 ++-- kernel/signal.c | 12 +++--------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 3c5e6c5b036..2a5e0244a24 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2598,7 +2598,16 @@ static inline void thread_group_cputime_init(struct signal_struct *sig) extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); -extern void signal_wake_up(struct task_struct *t, int resume_stopped); +extern void signal_wake_up_state(struct task_struct *t, unsigned int state); + +static inline void signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); +} +static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? __TASK_TRACED : 0); +} /* * Wrappers for p->thread_info->cpu access. No-op on UP. diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 67d1fdd3c55..6a4ecb08da8 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -117,7 +117,7 @@ void __ptrace_unlink(struct task_struct *child) * TASK_KILLABLE sleeps. */ if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) - signal_wake_up(child, task_is_traced(child)); + ptrace_signal_wake_up(child, true); spin_unlock(&child->sighand->siglock); } @@ -307,7 +307,7 @@ static int ptrace_attach(struct task_struct *task, long request, */ if (task_is_stopped(task) && task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) - signal_wake_up(task, 1); + signal_wake_up_state(task, __TASK_STOPPED); spin_unlock(&task->sighand->siglock); diff --git a/kernel/signal.c b/kernel/signal.c index 195331c56ad..13f95ed24eb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -676,23 +676,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * No need to set need_resched since signal event passing * goes through ->blocked */ -void signal_wake_up(struct task_struct *t, int resume) +void signal_wake_up_state(struct task_struct *t, unsigned int state) { - unsigned int mask; - set_tsk_thread_flag(t, TIF_SIGPENDING); - /* - * For SIGKILL, we want to wake it up in the stopped/traced/killable + * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it * executing another processor and just now entering stopped state. * By using wake_up_state, we ensure the process will wake up and * handle its death signal. */ - mask = TASK_INTERRUPTIBLE; - if (resume) - mask |= TASK_WAKEKILL; - if (!wake_up_state(t, mask)) + if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) kick_process(t); } From 889b65c064f7e687221ea77d12921ed2dc228afc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 19 Feb 2013 14:56:52 +0100 Subject: [PATCH 528/678] ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL putreg() assumes that the tracee is not running and pt_regs_access() can safely play with its stack. However a killed tracee can return from ptrace_stop() to the low-level asm code and do RESTORE_REST, this means that debugger can actually read/modify the kernel stack until the tracee does SAVE_REST again. set_task_blockstep() can race with SIGKILL too and in some sense this race is even worse, the very fact the tracee can be woken up breaks the logic. As Linus suggested we can clear TASK_WAKEKILL around the arch_ptrace() call, this ensures that nobody can ever wakeup the tracee while the debugger looks at it. Not only this fixes the mentioned problems, we can do some cleanups/simplifications in arch_ptrace() paths. Probably ptrace_unfreeze_traced() needs more callers, for example it makes sense to make the tracee killable for oom-killer before access_process_vm(). While at it, add the comment into may_ptrace_stop() to explain why ptrace_stop() still can't rely on SIGKILL and signal_pending_state(). Reported-by: Salman Qazi Reported-by: Suleiman Souhlal Suggested-by: Linus Torvalds Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ed Tam Conflicts: kernel/ptrace.c --- kernel/ptrace.c | 59 +++++++++++++++++++++++++++++++++++++++++-------- kernel/signal.c | 5 +++++ 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 6a4ecb08da8..822f701e6d7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -45,6 +45,36 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) child->parent = new_parent; } +/* Ensure that nothing can wake it up, even SIGKILL */ +static bool ptrace_freeze_traced(struct task_struct *task) +{ + bool ret = false; + + spin_lock_irq(&task->sighand->siglock); + if (task_is_traced(task) && !__fatal_signal_pending(task)) { + task->state = __TASK_TRACED; + ret = true; + } + spin_unlock_irq(&task->sighand->siglock); + + return ret; +} + +static void ptrace_unfreeze_traced(struct task_struct *task) +{ + if (task->state != __TASK_TRACED) + return; + + WARN_ON(!task->ptrace || task->parent != current); + + spin_lock_irq(&task->sighand->siglock); + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + spin_unlock_irq(&task->sighand->siglock); +} + /** * __ptrace_unlink - unlink ptracee and restore its execution state * @child: ptracee to be unlinked @@ -151,24 +181,30 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state) * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); - if ((child->ptrace & PT_PTRACED) && child->parent == current) { + if (child->ptrace && child->parent == current) { + WARN_ON(child->state == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ - spin_lock_irq(&child->sighand->siglock); - WARN_ON_ONCE(task_is_stopped(child)); - if (ignore_state || (task_is_traced(child) && + if (ignore_state || (ptrace_freeze_traced(child) && !(child->jobctl & JOBCTL_LISTENING))) ret = 0; - spin_unlock_irq(&child->sighand->siglock); } read_unlock(&tasklist_lock); - if (!ret && !ignore_state) - ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; + if (!ret && !ignore_state) { + if (!wait_task_inactive(child, __TASK_TRACED)) { + /* + * This can only happen if may_ptrace_stop() fails and + * ptrace_stop() changes ->state back to TASK_RUNNING, + * so we should not worry about leaking __TASK_TRACED. + */ + WARN_ON(child->state == __TASK_TRACED); + ret = -ESRCH; + } + } - /* All systems go.. */ return ret; } @@ -899,6 +935,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, goto out_put_task_struct; ret = arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); out_put_task_struct: put_task_struct(child); @@ -1038,8 +1076,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, ret = ptrace_check_attach(child, request == PTRACE_KILL || request == PTRACE_INTERRUPT); - if (!ret) + if (!ret) { ret = compat_arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); + } out_put_task_struct: put_task_struct(child); diff --git a/kernel/signal.c b/kernel/signal.c index 13f95ed24eb..4ff63de4e03 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1742,6 +1742,10 @@ static inline int may_ptrace_stop(void) * If SIGKILL was already sent before the caller unlocked * ->siglock we must see ->core_state != NULL. Otherwise it * is safe to enter schedule(). + * + * This is almost outdated, a task with the pending SIGKILL can't + * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported + * after SIGKILL was already dequeued. */ if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) @@ -1867,6 +1871,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) if (gstop_done) do_notify_parent_cldstop(current, false, why); + /* tasklist protects us from ptrace_freeze_traced() */ __set_current_state(TASK_RUNNING); if (clear_code) current->exit_code = 0; From 3e3cc7dec2644c7df7fb57084474645ba59c0a9e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 19 Feb 2013 14:56:53 +0100 Subject: [PATCH 529/678] wake_up_process() should be never used to wakeup a TASK_STOPPED/TRACED task wake_up_process() should never wakeup a TASK_STOPPED/TRACED task. Change it to use TASK_NORMAL and add the WARN_ON(). TASK_ALL has no other users, probably can be killed. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Cc: Michal Hocko Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ed Tam --- kernel/sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 5f7dc6e53f5..5377f277072 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2968,7 +2968,8 @@ static void try_to_wake_up_local(struct task_struct *p) */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_ALL, 0); + WARN_ON(task_is_stopped_or_traced(p)); + return try_to_wake_up(p, TASK_NORMAL, 0); } EXPORT_SYMBOL(wake_up_process); From c3ad258d34aa6738daaa993ce9f19df7b38b7471 Mon Sep 17 00:00:00 2001 From: Ed Tam Date: Thu, 4 Apr 2013 00:54:19 +0000 Subject: [PATCH 530/678] Revert "Sensors: Some updates for Invensense v5.1.5 IIO driver release." This reverts commit 28ec66dec02955f0c62bcf6829c61b96ed9821c4 Change-Id: If4be3c26ef1ca7f057ad1bda2755da4878b4dcd4 --- drivers/staging/iio/imu/mpu/inv_mpu_iio.h | 2 +- drivers/staging/iio/imu/mpu/inv_mpu_misc.c | 19 ++++++------------- drivers/staging/iio/imu/mpu/inv_mpu_ring.c | 2 +- drivers/staging/iio/imu/mpu/inv_mpu_trigger.c | 3 +-- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h index 9d99d9bac1c..b26d27b1548 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h +++ b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h @@ -594,7 +594,6 @@ struct inv_mpu_slave { #define MAX_GYRO_FS_PARAM 3 #define MAX_ACCL_FS_PARAM 3 #define MAX_LPA_FREQ_PARAM 3 -#define MPU6XXX_MAX_MPU_MEM (256 * 12) #define INIT_MOT_DUR 128 #define INIT_MOT_THR 128 @@ -843,6 +842,7 @@ void inv_setup_reg_mpu3050(struct inv_reg_map_s *reg); int inv_switch_3050_gyro_engine(struct inv_mpu_iio_s *st, bool en); int inv_switch_3050_accl_engine(struct inv_mpu_iio_s *st, bool en); int set_power_mpu3050(struct inv_mpu_iio_s *st, bool power_on); +int set_inv_enable(struct iio_dev *indio_dev, bool enable); int inv_set_interrupt_on_gesture_event(struct inv_mpu_iio_s *st, bool on); int inv_send_quaternion(struct inv_mpu_iio_s *st, bool on); int inv_set_display_orient_interrupt_dmp(struct inv_mpu_iio_s *st, bool on); diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c index d672fa482a8..ec90fc82428 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c @@ -447,15 +447,13 @@ int mpu_memory_read(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, int mpu_memory_write_unaligned(struct inv_mpu_iio_s *st, u16 key, int len, u8 const *d) { - u32 addr; + int addr; int start, end; int len1, len2; int result = 0; if (len > MPU_MEM_BANK_SIZE) return -EINVAL; addr = inv_dmp_get_address(key); - if (addr > MPU6XXX_MAX_MPU_MEM) - return -EINVAL; start = (addr >> 8); end = ((addr + len - 1) >> 8); if (start == end) { @@ -1944,8 +1942,6 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, if (st->chip_config.firmware_loaded) return -EINVAL; - if (st->chip_config.enable) - return -EBUSY; reg = &st->reg; if (DMP_IMAGE_SIZE != size) { @@ -2029,12 +2025,10 @@ ssize_t inv_dmp_firmware_read(struct file *filp, data = 0; mutex_lock(&indio_dev->mlock); - if (!st->chip_config.enable) { - result = st->set_power_state(st, true); - if (result) { - mutex_unlock(&indio_dev->mlock); - return result; - } + result = st->set_power_state(st, true); + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; } for (bank = 0; size > 0; bank++, size -= write_size, data += write_size) { @@ -2051,8 +2045,7 @@ ssize_t inv_dmp_firmware_read(struct file *filp, return result; } } - if (!st->chip_config.enable) - result = st->set_power_state(st, false); + result = st->set_power_state(st, false); mutex_unlock(&indio_dev->mlock); if (result) return result; diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c index fcebf833569..29ce25078cf 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c @@ -416,7 +416,7 @@ static int inv_set_dmp_sysfs(struct inv_mpu_iio_s *st) * @st: Device driver instance. * @fifo_enable: enable/disable */ -static int set_inv_enable(struct iio_dev *indio_dev, +int set_inv_enable(struct iio_dev *indio_dev, bool enable) { struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct inv_reg_map_s *reg; diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c index 45cd33932e6..2dd8c0c4a0c 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c @@ -52,8 +52,7 @@ static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig, struct iio_dev *indio_dev = trig->private_data; dev_dbg(&indio_dev->dev, "%s (%d)\n", __func__, state); - - return 0; + return set_inv_enable(indio_dev, state); } static const struct iio_trigger_ops inv_mpu_trigger_ops = { From 45337493979e4ac3949301bba6d78ffdf026e406 Mon Sep 17 00:00:00 2001 From: Ed Tam Date: Fri, 5 Apr 2013 23:50:33 +0000 Subject: [PATCH 531/678] Revert "Revert "Sensors: Some updates for Invensense v5.1.5 IIO driver release."" Sensors added back with fix. This reverts commit d90e3d9e757b6d776a535b8f6fb85ed78f5b6fd8 Change-Id: Ic732f3603f6d8aac8591d960f915ef6f06ab152a --- drivers/staging/iio/imu/mpu/inv_mpu_iio.h | 2 +- drivers/staging/iio/imu/mpu/inv_mpu_misc.c | 19 +++++++++++++------ drivers/staging/iio/imu/mpu/inv_mpu_ring.c | 2 +- drivers/staging/iio/imu/mpu/inv_mpu_trigger.c | 3 ++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h index b26d27b1548..9d99d9bac1c 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_iio.h +++ b/drivers/staging/iio/imu/mpu/inv_mpu_iio.h @@ -594,6 +594,7 @@ struct inv_mpu_slave { #define MAX_GYRO_FS_PARAM 3 #define MAX_ACCL_FS_PARAM 3 #define MAX_LPA_FREQ_PARAM 3 +#define MPU6XXX_MAX_MPU_MEM (256 * 12) #define INIT_MOT_DUR 128 #define INIT_MOT_THR 128 @@ -842,7 +843,6 @@ void inv_setup_reg_mpu3050(struct inv_reg_map_s *reg); int inv_switch_3050_gyro_engine(struct inv_mpu_iio_s *st, bool en); int inv_switch_3050_accl_engine(struct inv_mpu_iio_s *st, bool en); int set_power_mpu3050(struct inv_mpu_iio_s *st, bool power_on); -int set_inv_enable(struct iio_dev *indio_dev, bool enable); int inv_set_interrupt_on_gesture_event(struct inv_mpu_iio_s *st, bool on); int inv_send_quaternion(struct inv_mpu_iio_s *st, bool on); int inv_set_display_orient_interrupt_dmp(struct inv_mpu_iio_s *st, bool on); diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c index ec90fc82428..d672fa482a8 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c @@ -447,13 +447,15 @@ int mpu_memory_read(struct inv_mpu_iio_s *st, u8 mpu_addr, u16 mem_addr, int mpu_memory_write_unaligned(struct inv_mpu_iio_s *st, u16 key, int len, u8 const *d) { - int addr; + u32 addr; int start, end; int len1, len2; int result = 0; if (len > MPU_MEM_BANK_SIZE) return -EINVAL; addr = inv_dmp_get_address(key); + if (addr > MPU6XXX_MAX_MPU_MEM) + return -EINVAL; start = (addr >> 8); end = ((addr + len - 1) >> 8); if (start == end) { @@ -1942,6 +1944,8 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, if (st->chip_config.firmware_loaded) return -EINVAL; + if (st->chip_config.enable) + return -EBUSY; reg = &st->reg; if (DMP_IMAGE_SIZE != size) { @@ -2025,10 +2029,12 @@ ssize_t inv_dmp_firmware_read(struct file *filp, data = 0; mutex_lock(&indio_dev->mlock); - result = st->set_power_state(st, true); - if (result) { - mutex_unlock(&indio_dev->mlock); - return result; + if (!st->chip_config.enable) { + result = st->set_power_state(st, true); + if (result) { + mutex_unlock(&indio_dev->mlock); + return result; + } } for (bank = 0; size > 0; bank++, size -= write_size, data += write_size) { @@ -2045,7 +2051,8 @@ ssize_t inv_dmp_firmware_read(struct file *filp, return result; } } - result = st->set_power_state(st, false); + if (!st->chip_config.enable) + result = st->set_power_state(st, false); mutex_unlock(&indio_dev->mlock); if (result) return result; diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c index 29ce25078cf..fcebf833569 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_ring.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_ring.c @@ -416,7 +416,7 @@ static int inv_set_dmp_sysfs(struct inv_mpu_iio_s *st) * @st: Device driver instance. * @fifo_enable: enable/disable */ -int set_inv_enable(struct iio_dev *indio_dev, +static int set_inv_enable(struct iio_dev *indio_dev, bool enable) { struct inv_mpu_iio_s *st = iio_priv(indio_dev); struct inv_reg_map_s *reg; diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c index 2dd8c0c4a0c..45cd33932e6 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_trigger.c @@ -52,7 +52,8 @@ static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig, struct iio_dev *indio_dev = trig->private_data; dev_dbg(&indio_dev->dev, "%s (%d)\n", __func__, state); - return set_inv_enable(indio_dev, state); + + return 0; } static const struct iio_trigger_ops inv_mpu_trigger_ops = { From 4bcd9c2ea31436688be5327e875e66ec77e19393 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 13 Mar 2013 14:59:33 -0700 Subject: [PATCH 532/678] signal: always clear sa_restorer on execve When the new signal handlers are set up, the location of sa_restorer is not cleared, leaking a parent process's address space location to children. This allows for a potential bypass of the parent's ASLR by examining the sa_restorer value returned when calling sigaction(). Based on what should be considered "secret" about addresses, it only matters across the exec not the fork (since the VMAs haven't changed until the exec). But since exec sets SIG_DFL and keeps sa_restorer, this is where it should be fixed. Given the few uses of sa_restorer, a "set" function was not written since this would be the only use. Instead, we use __ARCH_HAS_SA_RESTORER, as already done in other places. Example of the leak before applying this patch: $ cat /proc/$$/maps ... 7fb9f3083000-7fb9f3238000 r-xp 00000000 fd:01 404469 .../libc-2.15.so ... $ ./leak ... 7f278bc74000-7f278be29000 r-xp 00000000 fd:01 404469 .../libc-2.15.so ... 1 0 (nil) 0x7fb9f30b94a0 2 4000000 (nil) 0x7f278bcaa4a0 3 4000000 (nil) 0x7f278bcaa4a0 4 0 (nil) 0x7fb9f30b94a0 ... [akpm@linux-foundation.org: use SA_RESTORER for backportability] Signed-off-by: Kees Cook Reported-by: Emese Revfy Cc: Emese Revfy Cc: PaX Team Cc: Al Viro Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Serge Hallyn Cc: Julien Tinnes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Ed Tam --- kernel/signal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index 4ff63de4e03..cc243da3acc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -481,6 +481,9 @@ flush_signal_handlers(struct task_struct *t, int force_default) if (force_default || ka->sa.sa_handler != SIG_IGN) ka->sa.sa_handler = SIG_DFL; ka->sa.sa_flags = 0; +#ifdef SA_RESTORER + ka->sa.sa_restorer = NULL; +#endif sigemptyset(&ka->sa.sa_mask); ka++; } From 39259f946e71a29e2fc89301e541f57c46f6c2aa Mon Sep 17 00:00:00 2001 From: Haley Teng Date: Thu, 11 Apr 2013 21:00:58 +0800 Subject: [PATCH 533/678] arm: tegra: apbio: move init call to subsys_initcall Signed-off-by: Haley Teng --- arch/arm/mach-tegra/apbio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/apbio.c b/arch/arm/mach-tegra/apbio.c index e227331c2f0..6b6e07fd5d3 100644 --- a/arch/arm/mach-tegra/apbio.c +++ b/arch/arm/mach-tegra/apbio.c @@ -164,4 +164,4 @@ static int tegra_init_apb_dma(void) #endif return 0; } -arch_initcall(tegra_init_apb_dma); +subsys_initcall(tegra_init_apb_dma); From 662f3369215088bd17bd843ac3094866d63a57af Mon Sep 17 00:00:00 2001 From: Joseph_Wu Date: Mon, 29 Apr 2013 20:31:58 -0700 Subject: [PATCH 534/678] Sensors: Fix a drift when running at lower rate. Change-Id: I308940ae68dc4d6ce1fa4e4879bc17aefd5121b5 Signed-off-by: Joseph_Wu --- .../staging/iio/imu/mpu/dmpDefaultMPU6050.c | 169 ++++++++---------- drivers/staging/iio/imu/mpu/dmpKey.h | 37 +++- drivers/staging/iio/imu/mpu/inv_mpu_misc.c | 8 +- 3 files changed, 114 insertions(+), 100 deletions(-) diff --git a/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c b/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c index 22b7ee4922c..0242e10d201 100644 --- a/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c +++ b/drivers/staging/iio/imu/mpu/dmpDefaultMPU6050.c @@ -9,81 +9,65 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - */ -/** - * @addtogroup DRIVERS - * @brief Hardware drivers. - * - * @{ - * @file dmpDefaultMPU6050.c - * @brief dmp Default data - * @details This file is part of invensense mpu driver code - * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include "inv_mpu_iio.h" #include "dmpKey.h" #include "dmpmap.h" -#define CFG_LP_QUAT (2914) -#define END_ORIENT_TEMP (2068) -#define CFG_27 (2944) -#define CFG_20 (2426) -#define CFG_23 (2947) -#define CFG_DISPLAY_ORIENT_INT (2055) -#define CFG_FIFO_ON_EVENT (2892) -#define END_PREDICTION_UPDATE (1963) -#define CGNOTICE_INTR (2822) -#define X_GRT_Y_TMP (1560) +#define CFG_LP_QUAT (2500) +#define END_ORIENT_TEMP (2063) +#define CFG_27 (2530) +#define CFG_23 (2533) +#define CFG_PED_ENABLE (2620) +#define CFG_FIFO_ON_EVENT (2475) +#define CFG_PED_INT (2873) +#define END_PREDICTION_UPDATE (1958) +#define X_GRT_Y_TMP (1555) #define CFG_DR_INT (1029) #define CFG_AUTH (1035) -#define SKIP_SWING_END_1 (1753) -#define SKIP_SWING_END_2 (1768) -#define FCFG_1 (1062) -#define SKIP_X_GRT_Y_TMP (1561) -#define SKIP_END_COMPARE (1637) -#define FCFG_3 (1088) +#define UPDATE_PROP_ROT (2032) +#define END_COMPARE_Y_X_TMP2 (1652) +#define SKIP_X_GRT_Y_TMP (1556) +#define SKIP_END_COMPARE (1632) +#define FCFG_3 (1087) #define FCFG_2 (1066) -#define STATE2_T (1348) -#define END_COMPARE_Y_X_TMP3 (1636) +#define FCFG_1 (1062) +#define END_COMPARE_Y_X_TMP3 (1631) #define FCFG_7 (1073) -#define FCFG_6 (1106) -#define FLAT_STATE_END (1915) -#define SWING_END_4 (1818) -#define EXIT_SIGMOTDET (1408) -#define SWING_END_2 (1767) -#define SWING_END_3 (1789) -#define SWING_END_1 (1752) -#define CFG_8 (2920) -#define CFG_15 (2929) -#define CFG_16 (2948) -#define UPDATE_PROP_ROT (2037) -#define CFG_EXT_GYRO_BIAS (1189) -#define END_COMPARE_Y_X_TMP (1609) -#define DO_NOT_UPDATE_PROP_ROT (2041) -#define CFG_7 (1408) -#define FLAT_STATE_END_TEMP (1885) -#define END_ORIENT (2086) -#define END_COMPARE_Y_X (1686) -#define END_COMPARE_Y_X_TMP2 (1657) -#define SMD_TP2 (1371) -#define CFG_FLICK_IN (2775) -#define SKIP_SWING_END_3 (1790) -#define SMD_TP1 (1346) -#define TILTG75_START (1874) -#define CFG_6 (2955) -#define TILTL75_END (1871) -#define END_SIGMOTDET (1401) -#define EXIT1 (1347) -#define EXIT0 (1330) -#define EXIT3 (1382) -#define EXIT2 (1372) -#define TILTL75_START (1845) -#define CFG_MOTION_BIAS (1410) -#define X_GRT_Y (1610) -#define TEMPLABEL (2526) -#define CFG_GYRO_RAW_DATA (2924) -#define X_GRT_Y_TMP2 (1581) +#define FCFG_6 (1105) +#define FLAT_STATE_END (1910) +#define SWING_END_4 (1813) +#define SWING_END_2 (1762) +#define SWING_END_3 (1784) +#define SWING_END_1 (1747) +#define CFG_8 (2506) +#define CFG_15 (2515) +#define CFG_16 (2534) +#define CFG_EXT_GYRO_BIAS (1184) +#define END_COMPARE_Y_X_TMP (1604) +#define DO_NOT_UPDATE_PROP_ROT (2036) +#define CFG_7 (1403) +#define FLAT_STATE_END_TEMP (1880) +#define END_COMPARE_Y_X (1681) +#define SMD_TP2 (1366) +#define SKIP_SWING_END_1 (1748) +#define SKIP_SWING_END_3 (1785) +#define SKIP_SWING_END_2 (1763) +#define SMD_TP1 (1343) +#define TILTG75_START (1869) +#define CFG_6 (2541) +#define TILTL75_END (1866) +#define END_ORIENT (2081) +#define TILTL75_START (1840) +#define CFG_MOTION_BIAS (1405) +#define X_GRT_Y (1605) +#define TEMPLABEL (2105) +#define CFG_DISPLAY_ORIENT_INT (2050) + +#define CFG_GYRO_RAW_DATA (2510) +#define X_GRT_Y_TMP2 (1576) #define D_0_22 (22+512) #define D_0_24 (24+512) @@ -168,12 +152,12 @@ #define FLICK_LOWER (45 * 16 + 12) #define FLICK_UPPER (46 * 16 + 12) -#define D_SMD_ENABLE (49 * 16) -#define D_SMD_ACCEL_THLD (53 * 16 + 8) -#define D_SMD_DELAY_THLD (54 * 16 + 4) -#define D_SMD_DELAY2_THLD (54 * 16 + 12) -#define D_SMD_EXE_STATE (55 * 16) -#define D_SMD_DELAY_CNTR (54 * 16) +#define D_SMD_ENABLE (18 * 16) +#define D_SMD_MOT_THLD (20 * 16) +#define D_SMD_DELAY_THLD (21 * 16 + 4) +#define D_SMD_DELAY2_THLD (21 * 16 + 12) +#define D_SMD_EXE_STATE (22 * 16) +#define D_SMD_DELAY_CNTR (21 * 16) #define D_AUTH_OUT (992) #define D_AUTH_IN (996) @@ -213,21 +197,22 @@ #define D_TILT3_H (60) #define D_TILT3_L (62) +/* Batch mode */ +#define D_BM_BATCH_CNTR (27*16+4) +#define D_BM_BATCH_THLD (27*16+8) +#define D_BM_ENABLE (28*16+6) +#define D_BM_NUMWORD_TOFILL (28*16+4) + static const struct tKeyLabel dmpTConfig[] = { {KEY_CFG_27, CFG_27}, - {KEY_CFG_20, CFG_20}, {KEY_CFG_23, CFG_23}, + {KEY_CFG_PED_ENABLE, CFG_PED_ENABLE}, {KEY_CFG_FIFO_ON_EVENT, CFG_FIFO_ON_EVENT}, - {KEY_CGNOTICE_INTR, CGNOTICE_INTR}, - {KEY_X_GRT_Y_TMP, X_GRT_Y_TMP}, {KEY_CFG_DR_INT, CFG_DR_INT}, {KEY_CFG_AUTH, CFG_AUTH}, {KEY_FCFG_1, FCFG_1}, - {KEY_SKIP_X_GRT_Y_TMP, SKIP_X_GRT_Y_TMP}, - {KEY_SKIP_END_COMPARE, SKIP_END_COMPARE}, {KEY_FCFG_3, FCFG_3}, {KEY_FCFG_2, FCFG_2}, - {KEY_END_COMPARE_Y_X_TMP2, END_COMPARE_Y_X_TMP2}, {KEY_CFG_DISPLAY_ORIENT_INT, CFG_DISPLAY_ORIENT_INT}, {KEY_FCFG_7, FCFG_7}, {KEY_FCFG_6, FCFG_6}, @@ -235,19 +220,12 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_CFG_15, CFG_15}, {KEY_CFG_16, CFG_16}, {KEY_CFG_EXT_GYRO_BIAS, CFG_EXT_GYRO_BIAS}, - {KEY_END_COMPARE_Y_X_TMP, END_COMPARE_Y_X_TMP}, {KEY_CFG_6, CFG_6}, - {KEY_END_COMPARE_Y_X, END_COMPARE_Y_X}, {KEY_CFG_LP_QUAT, CFG_LP_QUAT}, - {KEY_END_ORIENT, END_ORIENT}, - {KEY_CFG_FLICK_IN, CFG_FLICK_IN}, {KEY_CFG_7, CFG_7}, {KEY_CFG_MOTION_BIAS, CFG_MOTION_BIAS}, - {KEY_X_GRT_Y, X_GRT_Y}, - {KEY_TEMPLABEL, TEMPLABEL}, - {KEY_END_COMPARE_Y_X_TMP3, END_COMPARE_Y_X_TMP3}, + {KEY_CFG_DISPLAY_ORIENT_INT, CFG_DISPLAY_ORIENT_INT}, {KEY_CFG_GYRO_RAW_DATA, CFG_GYRO_RAW_DATA}, - {KEY_X_GRT_Y_TMP2, X_GRT_Y_TMP2}, {KEY_D_0_22, D_0_22}, {KEY_D_0_96, D_0_96}, {KEY_D_0_104, D_0_104}, @@ -267,10 +245,6 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_D_1_218, D_1_218}, {KEY_D_1_232, D_1_232}, {KEY_D_1_250, D_1_250}, - {KEY_DMP_TAPW_MIN, DMP_TAPW_MIN}, - {KEY_DMP_TAP_THR_X, DMP_TAP_THX}, - {KEY_DMP_TAP_THR_Y, DMP_TAP_THY}, - {KEY_DMP_TAP_THR_Z, DMP_TAP_THZ}, {KEY_DMP_SH_TH_Y, DMP_SH_TH_Y}, {KEY_DMP_SH_TH_X, DMP_SH_TH_X}, {KEY_DMP_SH_TH_Z, DMP_SH_TH_Z}, @@ -331,14 +305,19 @@ static const struct tKeyLabel dmpTConfig[] = { {KEY_CFG_EXT_GYRO_BIAS_X, D_EXT_GYRO_BIAS_X}, {KEY_CFG_EXT_GYRO_BIAS_Y, D_EXT_GYRO_BIAS_Y}, {KEY_CFG_EXT_GYRO_BIAS_Z, D_EXT_GYRO_BIAS_Z}, + {KEY_CFG_PED_INT, CFG_PED_INT}, {KEY_SMD_ENABLE, D_SMD_ENABLE}, - {KEY_SMD_ACCEL_THLD, D_SMD_ACCEL_THLD}, + {KEY_SMD_ACCEL_THLD, D_SMD_MOT_THLD}, {KEY_SMD_DELAY_THLD, D_SMD_DELAY_THLD}, {KEY_SMD_DELAY2_THLD, D_SMD_DELAY2_THLD}, {KEY_SMD_ENABLE_TESTPT1, SMD_TP1}, {KEY_SMD_ENABLE_TESTPT2, SMD_TP2}, {KEY_SMD_EXE_STATE, D_SMD_EXE_STATE}, - {KEY_SMD_DELAY_CNTR, D_SMD_DELAY_CNTR} + {KEY_SMD_DELAY_CNTR, D_SMD_DELAY_CNTR}, + {KEY_BM_ENABLE, D_BM_ENABLE}, + {KEY_BM_BATCH_CNTR, D_BM_BATCH_CNTR}, + {KEY_BM_BATCH_THLD, D_BM_BATCH_THLD}, + {KEY_BM_NUMWORD_TOFILL, D_BM_NUMWORD_TOFILL} }; #define NUM_LOCAL_KEYS (sizeof(dmpTConfig)/sizeof(dmpTConfig[0])) @@ -358,7 +337,11 @@ unsigned short inv_dmp_get_address(unsigned short key) keys[dmpTConfig[kk].key].addr = dmpTConfig[kk].addr; isSorted = 1; } - if (key >= NUM_KEYS) + if (key >= NUM_KEYS) { + pr_err("ERROR!! key not exist=%d!\n", key); return 0xffff; + } + if (0xffff == keys[key].addr) + pr_err("ERROR!!key not local=%d!\n", key); return keys[key].addr; } diff --git a/drivers/staging/iio/imu/mpu/dmpKey.h b/drivers/staging/iio/imu/mpu/dmpKey.h index f03d7da0060..4c70ec294a9 100644 --- a/drivers/staging/iio/imu/mpu/dmpKey.h +++ b/drivers/staging/iio/imu/mpu/dmpKey.h @@ -97,7 +97,8 @@ #define KEY_CCS_HEADING_THLD (KEY_COMPASS_CHG_SENSITIVITY + 1) #define KEY_CCS_TIME_THLD (KEY_CCS_HEADING_THLD + 1) #define KEY_CCS_DOTP_THLD (KEY_CCS_TIME_THLD + 1) -#define KEY_CFG_NM_DET (KEY_CCS_DOTP_THLD + 1) +#define KEY_CCS_COMP_CNTR (KEY_CCS_DOTP_THLD + 1) +#define KEY_CFG_NM_DET (KEY_CCS_COMP_CNTR + 1) #define KEY_SMD_ENABLE (KEY_CFG_NM_DET + 1) #define KEY_SMD_ACCEL_THLD (KEY_SMD_ENABLE + 1) #define KEY_SMD_DELAY_THLD (KEY_SMD_ACCEL_THLD + 1) @@ -107,7 +108,7 @@ #define KEY_SMD_EXE_STATE (KEY_SMD_ENABLE_TESTPT2 + 1) #define KEY_SMD_DELAY_CNTR (KEY_SMD_EXE_STATE + 1) -#define KEY_BREAK (80) +#define KEY_BREAK (81) #if KEY_SMD_DELAY_CNTR != KEY_BREAK #error #endif @@ -302,9 +303,11 @@ #define KEY_D_PEDSTD_STEPCTR (KEY_D_PEDSTD_TIMECTR + 1) #define KEY_D_PEDSTD_WALKTIME (KEY_D_PEDSTD_STEPCTR + 1) #define KEY_D_PEDSTD_DECI (KEY_D_PEDSTD_WALKTIME + 1) +#define KEY_CFG_PED_INT (KEY_D_PEDSTD_DECI + 1) +#define KEY_CFG_PED_ENABLE (KEY_CFG_PED_INT + 1) /*Host Based No Motion*/ -#define KEY_D_HOST_NO_MOT (KEY_D_PEDSTD_DECI + 1) +#define KEY_D_HOST_NO_MOT (KEY_CFG_PED_ENABLE + 1) /*Host Based Accel Bias*/ #define KEY_D_ACCEL_BIAS (KEY_D_HOST_NO_MOT + 1) @@ -333,7 +336,33 @@ #define KEY_STREAM_P_FOOTER (KEY_STREAM_P_ACCEL_X + 1) #define KEY_STREAM_P_ACCEL_Z (KEY_STREAM_P_FOOTER + 1) -#define NUM_KEYS (KEY_STREAM_P_ACCEL_Z + 1) +/* Batch mode */ +#define KEY_BM_ENABLE (KEY_STREAM_P_ACCEL_Z + 1) +#define KEY_BM_BATCH_THLD (KEY_BM_ENABLE + 1) +#define KEY_BM_BATCH_CNTR (KEY_BM_BATCH_THLD + 1) +#define KEY_BM_NUMWORD_TOFILL (KEY_BM_BATCH_CNTR + 1) + +/* Watermark */ +#define KEY_CFG_WATERMARK_H (KEY_BM_NUMWORD_TOFILL + 1) +#define KEY_CFG_WATERMARK_L (KEY_CFG_WATERMARK_H + 1) + +/* FIFO output control */ +#define KEY_CFG_OUT_ACCL (KEY_CFG_WATERMARK_L + 1) +#define KEY_CFG_OUT_GYRO (KEY_CFG_OUT_ACCL + 1) +#define KEY_CFG_OUT_3QUAT (KEY_CFG_OUT_GYRO + 1) +#define KEY_CFG_OUT_6QUAT (KEY_CFG_OUT_3QUAT + 1) +#define KEY_CFG_OUT_PQUAT (KEY_CFG_OUT_6QUAT + 1) +#define KEY_CFG_FIFO_INT (KEY_CFG_OUT_PQUAT + 1) +/* Ped Step detection */ +#define KEY_CFG_PEDSTEP_DET (KEY_CFG_FIFO_INT + 1) + +/* Screen Orientation data */ +#define KEY_SO_DATA (KEY_CFG_PEDSTEP_DET + 1) + +/* MPU for DMP Android K */ +#define KEY_P_HW_ID (KEY_SO_DATA + 1) + +#define NUM_KEYS (KEY_P_HW_ID + 1) struct tKeyLabel { unsigned short key; diff --git a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c index d672fa482a8..00f16875a83 100644 --- a/drivers/staging/iio/imu/mpu/inv_mpu_misc.c +++ b/drivers/staging/iio/imu/mpu/inv_mpu_misc.c @@ -56,8 +56,8 @@ #define DMP_PRECISION 1000 #define DMP_MAX_DIVIDER 4 #define DMP_MAX_MIN_TAPS 4 -#define DMP_IMAGE_CRC_VALUE 0xb0338aac -#define DMP_IMAGE_SIZE 2976 +#define DMP_IMAGE_CRC_VALUE 0x665f5a73 +#define DMP_IMAGE_SIZE 2913 /*--- Test parameters defaults --- */ #define DEF_OLDEST_SUPP_PROD_REV 8 @@ -1560,6 +1560,7 @@ static u16 inv_orientation_matrix_to_scaler(const signed char *mtx) return scalar; } +#if 0 static int inv_disable_gyro_cal(struct inv_mpu_iio_s *st) { const u8 regs[] = { @@ -1569,6 +1570,7 @@ static int inv_disable_gyro_cal(struct inv_mpu_iio_s *st) }; return mem_w_key(KEY_CFG_MOTION_BIAS, ARRAY_SIZE(regs), regs); } +#endif static int inv_gyro_dmp_cal(struct inv_mpu_iio_s *st) { @@ -1998,7 +2000,7 @@ ssize_t inv_dmp_firmware_write(struct file *fp, struct kobject *kobj, result = inv_accel_dmp_cal(st); if (result) goto firmware_write_fail; - result = inv_disable_gyro_cal(st); + /* result = inv_disable_gyro_cal(st); */ if (result) goto firmware_write_fail; From 0b39ea868faa0cbc30ee416f50042f15571fa6a5 Mon Sep 17 00:00:00 2001 From: Ken Sumrall Date: Tue, 9 Apr 2013 14:41:18 -0700 Subject: [PATCH 535/678] Revert "mmc: card: Bypass discard for Hynix and Kingston" Moving away from the ext4 "discard" mount option to using fstrim, any perceived performance loss due to enabling discard on Hynix or Kingston emmc chips is less important, and those chips do benefit from the discard command. So I'm reverting this patch so fstrim will work on non-Samsung emmc devices. Bug: 8056794 This reverts commit 5c6426d0ea1ae8b9c617afe2dd8176ec1818653f. Change-Id: Ifc61a553c0430928fc78b14f64e25c925bea224b --- drivers/mmc/card/block.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 691e74d9d42..66b621a4c84 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -697,17 +697,6 @@ static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) unsigned int from, nr, arg; int err = 0; - /* - * The Nexus 7 ships with several emmc chips. The ext4 discard - * mount option is required to prevent performance issues on - * one chip, but hurts performance on others. However, if this - * is a secure erase request, we want this to work on all chips, - * as this is used in factory wipe. So this test will enable the - * discard option for the one chip, and secure erase for all chips. - */ - if (!(req->cmd_flags & REQ_SECURE) && !(card->cid.manfid == 0x15)) - goto out; - if (!mmc_can_erase(card)) { err = -EOPNOTSUPP; goto out; From fe8c751138c56fa4419cee63b854aa951ff54564 Mon Sep 17 00:00:00 2001 From: Jon Mayo Date: Sat, 4 May 2013 01:54:35 +0800 Subject: [PATCH 536/678] video: tegra: avoid null deref on nvdps read When reading nvdps sysfs file, check mode to avoid a null dereference. Bug 1032235 Bug 1283024 Change-Id: I19b3b5b3de6743cdcc9e3a846a4ba102de681ad3 Signed-off-by: Haley Teng --- drivers/video/tegra/fb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/video/tegra/fb.c b/drivers/video/tegra/fb.c index 0272b7f68f0..43c85c985b2 100644 --- a/drivers/video/tegra/fb.c +++ b/drivers/video/tegra/fb.c @@ -411,7 +411,11 @@ static int tegra_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long return 0; } -int tegra_fb_get_mode(struct tegra_dc *dc) { +int tegra_fb_get_mode(struct tegra_dc *dc) +{ + if (!dc->fb->info->mode) + return -1; + return dc->fb->info->mode->refresh; } From 24e110d72b48e5e2ff9dd0f25dc6632b5ca3c765 Mon Sep 17 00:00:00 2001 From: Haley Teng Date: Sat, 4 May 2013 02:00:37 +0800 Subject: [PATCH 537/678] asoc: tegra: need to enable ahub clocks before accessing DAM registers To avoid hard hang when you execute the below command lines - cat /sys/kernel/debug/asoc/tegra30-dam.0 - cat /sys/kernel/debug/asoc/tegra30-dam.1 - cat /sys/kernel/debug/asoc/tegra30-dam.2 Bug 1283024 Change-Id: I1c18bd5c1bbf6059930b8bbb279e4a8596c85bdd Signed-off-by: Haley Teng --- sound/soc/tegra/tegra30_dam.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/tegra/tegra30_dam.c b/sound/soc/tegra/tegra30_dam.c index d308179110c..85aa60b627a 100644 --- a/sound/soc/tegra/tegra30_dam.c +++ b/sound/soc/tegra/tegra30_dam.c @@ -158,6 +158,7 @@ static int tegra30_dam_show(struct seq_file *s, void *unused) struct tegra30_dam_context *dam = s->private; int i; + tegra30_ahub_enable_clocks(); clk_enable(dam->dam_clk); for (i = 0; i < ARRAY_SIZE(regs); i++) { @@ -166,6 +167,7 @@ static int tegra30_dam_show(struct seq_file *s, void *unused) } clk_disable(dam->dam_clk); + tegra30_ahub_disable_clocks(); return 0; } From 2581914888302d43c93edbf5240b7908d0746e0a Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Sat, 13 Apr 2013 22:49:14 +0300 Subject: [PATCH 538/678] perf: Treat attr.config as u64 in perf_swevent_init() Trinity discovered that we fail to check all 64 bits of attr.config passed by user space, resulting to out-of-bounds access of the perf_swevent_enabled array in sw_perf_event_destroy(). Introduced in commit b0a873ebb ("perf: Register PMU implementations"). Signed-off-by: Tommi Rantala Cc: Peter Zijlstra Cc: davej@redhat.com Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1365882554-30259-1-git-send-email-tt.rantala@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0f857782d06..25a977abe7d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5072,7 +5072,7 @@ static void sw_perf_event_destroy(struct perf_event *event) static int perf_swevent_init(struct perf_event *event) { - int event_id = event->attr.config; + u64 event_id = event->attr.config; if (event->attr.type != PERF_TYPE_SOFTWARE) return -ENOENT; From 2e8fb07759f76a07a0e0f6058da15f31d5cdb8df Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 31 May 2013 10:50:52 -0700 Subject: [PATCH 539/678] Revert "net: wireless: bcmdhd: Enable SUPPORT_PM2_ONLY mode" This reverts commit c8e978bfb0b1ae26b9376b8d66e39292d153d50b. --- drivers/net/wireless/bcmdhd/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/bcmdhd/Makefile b/drivers/net/wireless/bcmdhd/Makefile index 40816c4ac57..44aaa65bc27 100644 --- a/drivers/net/wireless/bcmdhd/Makefile +++ b/drivers/net/wireless/bcmdhd/Makefile @@ -8,7 +8,7 @@ DHDCFLAGS = -Wall -Wstrict-prototypes -Dlinux -DBCMDRIVER \ -DNEW_COMPAT_WIRELESS -DWIFI_ACT_FRAME -DARP_OFFLOAD_SUPPORT \ -DKEEP_ALIVE -DCSCAN -DGET_CUSTOM_MAC_ENABLE -DPKT_FILTER_SUPPORT \ -DEMBEDDED_PLATFORM -DENABLE_INSMOD_NO_FW_LOAD -DPNO_SUPPORT \ - -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT -DSUPPORT_PM2_ONLY \ + -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT \ -Idrivers/net/wireless/bcmdhd -Idrivers/net/wireless/bcmdhd/include DHDOFILES = aiutils.o bcmsdh_sdmmc_linux.o dhd_linux.o siutils.o bcmutils.o \ From 4ea2723466945b2dcb88327218d587db36bec7e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Oct 2012 09:14:12 +0000 Subject: [PATCH 540/678] tcp: fix FIONREAD/SIOCINQ [ Upstream commit a3374c42aa5f7237e87ff3b0622018636b0c847e ] tcp_ioctl() tries to take into account if tcp socket received a FIN to report correct number bytes in receive queue. But its flaky because if the application ate the last skb, we return 1 instead of 0. Correct way to detect that FIN was received is to test SOCK_DONE. Reported-by: Elliot Hughes Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ed Tam --- net/ipv4/tcp.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 09ced58e6a5..dcd64f4699b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -485,14 +485,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { - struct sk_buff *skb; answ = tp->rcv_nxt - tp->copied_seq; - /* Subtract 1, if FIN is in queue. */ - skb = skb_peek_tail(&sk->sk_receive_queue); - if (answ && skb) - answ -= tcp_hdr(skb)->fin; + /* Subtract 1, if FIN was received */ + if (answ && sock_flag(sk, SOCK_DONE)) + answ--; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); From ac2352b83f18214381d3f2db1f8c887392d3751f Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 5 Jun 2013 17:21:23 -0700 Subject: [PATCH 541/678] Revert "Revert "net: wireless: bcmdhd: Enable SUPPORT_PM2_ONLY mode"" This reverts commit 485ba23485cab0b29703f4c098d35cbee66f3a28. --- drivers/net/wireless/bcmdhd/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/bcmdhd/Makefile b/drivers/net/wireless/bcmdhd/Makefile index 44aaa65bc27..40816c4ac57 100644 --- a/drivers/net/wireless/bcmdhd/Makefile +++ b/drivers/net/wireless/bcmdhd/Makefile @@ -8,7 +8,7 @@ DHDCFLAGS = -Wall -Wstrict-prototypes -Dlinux -DBCMDRIVER \ -DNEW_COMPAT_WIRELESS -DWIFI_ACT_FRAME -DARP_OFFLOAD_SUPPORT \ -DKEEP_ALIVE -DCSCAN -DGET_CUSTOM_MAC_ENABLE -DPKT_FILTER_SUPPORT \ -DEMBEDDED_PLATFORM -DENABLE_INSMOD_NO_FW_LOAD -DPNO_SUPPORT \ - -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT \ + -DSET_RANDOM_MAC_SOFTAP -DWL_CFG80211_STA_EVENT -DSUPPORT_PM2_ONLY \ -Idrivers/net/wireless/bcmdhd -Idrivers/net/wireless/bcmdhd/include DHDOFILES = aiutils.o bcmsdh_sdmmc_linux.o dhd_linux.o siutils.o bcmutils.o \ From 37402f4bd408b8186c634195d90620bed6b1695a Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Tue, 4 Jun 2013 17:29:38 -0700 Subject: [PATCH 542/678] ashmem: avoid deadlock between read and mmap calls Avoid holding ashmem_mutex across code that can page fault. Page faults grab the mmap_sem for the process, which are also held by mmap calls prior to calling ashmem_mmap, which locks ashmem_mutex. The reversed order of locking between the two can deadlock. The calls that can page fault are read() and the ASHMEM_SET_NAME and ASHMEM_GET_NAME ioctls. Move the code that accesses userspace pages outside the ashmem_mutex. Bug: 9261835 Change-Id: If1322e981d29c889a56cdc9dfcbc6df2729a45e9 Signed-off-by: Todd Poynor --- mm/ashmem.c | 59 +++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/mm/ashmem.c b/mm/ashmem.c index 66e3f23ee33..c1078aa2972 100644 --- a/mm/ashmem.c +++ b/mm/ashmem.c @@ -221,23 +221,30 @@ static ssize_t ashmem_read(struct file *file, char __user *buf, /* If size is not set, or set to 0, always return EOF. */ if (asma->size == 0) { - goto out; + goto out_unlock; } if (!asma->file) { ret = -EBADF; - goto out; + goto out_unlock; } + mutex_unlock(&ashmem_mutex); + + /* + * asma and asma->file are used outside the lock here. We assume + * once asma->file is set it will never be changed, and will not + * be destroyed until all references to the file are dropped and + * ashmem_release is called. + */ ret = asma->file->f_op->read(asma->file, buf, len, pos); - if (ret < 0) { - goto out; + if (ret >= 0) { + /** Update backing file pos, since f_ops->read() doesn't */ + asma->file->f_pos = *pos; } + return ret; - /** Update backing file pos, since f_ops->read() doesn't */ - asma->file->f_pos = *pos; - -out: +out_unlock: mutex_unlock(&ashmem_mutex); return ret; } @@ -406,50 +413,48 @@ static int set_prot_mask(struct ashmem_area *asma, unsigned long prot) static int set_name(struct ashmem_area *asma, void __user *name) { + char lname[ASHMEM_NAME_LEN]; + int len; int ret = 0; + len = strncpy_from_user(lname, name, ASHMEM_NAME_LEN); + if (len < 0) + return len; + if (len == ASHMEM_NAME_LEN) + lname[ASHMEM_NAME_LEN - 1] = '\0'; mutex_lock(&ashmem_mutex); /* cannot change an existing mapping's name */ - if (unlikely(asma->file)) { + if (unlikely(asma->file)) ret = -EINVAL; - goto out; - } + else + strcpy(asma->name + ASHMEM_NAME_PREFIX_LEN, lname); - if (unlikely(copy_from_user(asma->name + ASHMEM_NAME_PREFIX_LEN, - name, ASHMEM_NAME_LEN))) - ret = -EFAULT; - asma->name[ASHMEM_FULL_NAME_LEN-1] = '\0'; - -out: mutex_unlock(&ashmem_mutex); - return ret; } static int get_name(struct ashmem_area *asma, void __user *name) { int ret = 0; + char lname[ASHMEM_NAME_LEN]; + size_t len; mutex_lock(&ashmem_mutex); if (asma->name[ASHMEM_NAME_PREFIX_LEN] != '\0') { - size_t len; - /* * Copying only `len', instead of ASHMEM_NAME_LEN, bytes * prevents us from revealing one user's stack to another. */ len = strlen(asma->name + ASHMEM_NAME_PREFIX_LEN) + 1; - if (unlikely(copy_to_user(name, - asma->name + ASHMEM_NAME_PREFIX_LEN, len))) - ret = -EFAULT; + memcpy(lname, asma->name + ASHMEM_NAME_PREFIX_LEN, len); } else { - if (unlikely(copy_to_user(name, ASHMEM_NAME_DEF, - sizeof(ASHMEM_NAME_DEF)))) - ret = -EFAULT; + len = strlen(ASHMEM_NAME_DEF) + 1; + memcpy(lname, ASHMEM_NAME_DEF, len); } mutex_unlock(&ashmem_mutex); - + if (unlikely(copy_to_user(name, lname, len))) + ret = -EFAULT; return ret; } From 353f1d0eaef7d34cb6c79bb706ebf51c352621f1 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 29 Jul 2013 17:47:14 -0400 Subject: [PATCH 543/678] a58 for 4.3 fixups --- Makefile | 7 +- arch/arm/configs/metallice_grouper_defconfig | 37 +++++-- drivers/net/wireless/bcmdhd/dhd_linux.c | 4 +- drivers/power/smb347-charger.c | 111 ++++++++++++++++--- 4 files changed, 123 insertions(+), 36 deletions(-) diff --git a/Makefile b/Makefile index 01121443454..54878701fa5 100644 --- a/Makefile +++ b/Makefile @@ -369,13 +369,12 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -fno-delete-null-pointer-checks -mno-unaligned-access \ + -fno-delete-null-pointer-checks \ -mtune=cortex-a9 -march=armv7-a -mfpu=neon \ -fpredictive-commoning -fgcse-after-reload -ftree-vectorize -mvectorize-with-neon-quad \ -fipa-cp-clone -fsingle-precision-constant \ - -funswitch-loops -floop-interchange \ - -floop-strip-mine -floop-block - + -funswitch-loops + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := KBUILD_AFLAGS := -D__ASSEMBLY__ diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 8a56f0edf68..04328c8e547 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a0" +CONFIG_LOCALVERSION="-MKernel-a58" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -47,7 +47,7 @@ CONFIG_KERNEL_GZIP=y # CONFIG_KERNEL_LZMA is not set # CONFIG_KERNEL_LZO is not set CONFIG_DEFAULT_HOSTNAME="(none)" -CONFIG_SWAP=y +# CONFIG_SWAP is not set # CONFIG_SYSVIPC is not set # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set @@ -367,7 +367,7 @@ CONFIG_TEGRA_BB_XMM_POWER=y CONFIG_TEGRA_PLLM_RESTRICTED=y # CONFIG_TEGRA_WDT_RECOVERY is not set CONFIG_TEGRA_LP2_ARM_TWD=y -# CONFIG_TEGRA_SLOW_CSITE is not set +CONFIG_TEGRA_SLOW_CSITE=y # CONFIG_TEGRA_PREINIT_CLOCKS is not set # @@ -512,7 +512,9 @@ CONFIG_CMDLINE="tegra_wdt.heartbeat=30" CONFIG_CMDLINE_EXTEND=y # CONFIG_CMDLINE_FORCE is not set # CONFIG_XIP_KERNEL is not set -# CONFIG_KEXEC is not set +CONFIG_KEXEC=y +CONFIG_ATAGS_PROC=y +CONFIG_KEXEC_HARDBOOT=y # CONFIG_CRASH_DUMP is not set # CONFIG_AUTO_ZRELADDR is not set @@ -535,12 +537,12 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_TOUCHDEMAND=y # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -CONFIG_CPU_FREQ_GOV_POWERSAVE=y -CONFIG_CPU_FREQ_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_GOV_USERSPACE is not set CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_TOUCHDEMAND=y CONFIG_CPU_FREQ_GOV_INTERACTIVE=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_GOV_LULZACTIVE is not set # CONFIG_CPU_FREQ_GOV_PEGASUSQ is not set @@ -938,6 +940,7 @@ CONFIG_NET_ACT_MIRRED=y # CONFIG_NET_CLS_IND is not set CONFIG_NET_SCH_FIFO=y # CONFIG_DCB is not set +CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y @@ -2192,6 +2195,7 @@ CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y CONFIG_SND_HWDEP=y +CONFIG_SND_RAWMIDI=y CONFIG_SND_JACK=y # CONFIG_SND_SEQUENCER is not set # CONFIG_SND_MIXER_OSS is not set @@ -2304,7 +2308,7 @@ CONFIG_SND_HDA_POWER_SAVE_DEFAULT=10 CONFIG_SND_ARM=y CONFIG_SND_SPI=y CONFIG_SND_USB=y -# CONFIG_SND_USB_AUDIO is not set +CONFIG_SND_USB_AUDIO=y # CONFIG_SND_USB_UA101 is not set # CONFIG_SND_USB_CAIAQ is not set # CONFIG_SND_USB_6FIRE is not set @@ -2887,6 +2891,7 @@ CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 # # CONFIG_ADIS16400 is not set CONFIG_INV_MPU_IIO=y +# CONFIG_INV_IIO_MPU3050_ACCEL_SLAVE_BMA250 is not set # # Light sensors @@ -2901,7 +2906,8 @@ CONFIG_SENSORS_LTR558=y # Magnetometer sensors # # CONFIG_SENSORS_HMC5843 is not set -CONFIG_AMI306=y +# CONFIG_INV_YAS53X_IIO is not set +# CONFIG_INV_AMI306_IIO is not set # # Active energy metering IC @@ -3033,6 +3039,7 @@ CONFIG_TMPFS=y CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -3053,12 +3060,17 @@ CONFIG_MISC_FILESYSTEMS=y CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set -# CONFIG_NFS_V4 is not set +CONFIG_NFS_V4=y +# CONFIG_NFS_V4_1 is not set CONFIG_ROOT_NFS=y +# CONFIG_NFS_USE_LEGACY_DNS is not set +CONFIG_NFS_USE_KERNEL_DNS=y +# CONFIG_NFS_USE_NEW_IDMAPPER is not set # CONFIG_NFSD is not set CONFIG_LOCKD=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y # CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -3237,7 +3249,8 @@ CONFIG_ARM_UNWIND=y # # Security options # -# CONFIG_KEYS is not set +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set # CONFIG_SECURITY is not set # CONFIG_SECURITYFS is not set @@ -3300,7 +3313,7 @@ CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_CRC32C=y # CONFIG_CRYPTO_GHASH is not set -# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_RMD128 is not set diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index 44532405bbb..d5312180fd3 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -641,9 +641,9 @@ dhd_dynamic_dtim_skip_release(dhd_pub_t *dhdp) static int dhd_set_suspend(int value, dhd_pub_t *dhd) { -#if !defined(SUPPORT_PM2_ONLY) +//#if !defined(SUPPORT_PM2_ONLY) int power_mode = PM_MAX; -#endif +//#endif /* wl_pkt_filter_enable_t enable_parm; */ char iovbuf[32]; int bcn_li_dtim = 3; diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 2534b0686e2..03f5263bacf 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -103,7 +103,6 @@ #define APSD_DCP 0x02 #define APSD_OTHER 0x03 #define APSD_SDP 0x04 -#define APSD_SDP2 0x06 // tmtmtm: USB host mode charging #define USB_30 0x20 #define DCIN_OV_UV_STS 0x50 #define DELAY_FOR_CURR_LIMIT_RECONF (60) @@ -129,6 +128,8 @@ struct wake_lock charger_wakelock; static unsigned int project_id; static unsigned int pcba_ver; static int gpio_dock_in = 0; +static int charge_en_flag = 1; +static unsigned usb_det_cable_type = non_cable; /* Sysfs interface */ static DEVICE_ATTR(reg_status, S_IWUSR | S_IRUGO, smb347_reg_show, NULL); @@ -385,18 +386,19 @@ static int smb347_configure_charger(struct i2c_client *client, int value) return ret; } -static int smb347_charger_enable(bool enable) +static int smb347_pin_control(bool state) { struct i2c_client *client = charger->client; u8 ret = 0; - if (enable) { + mutex_lock(&charger->pinctrl_lock); + + if (state) { /*Pin Controls -active low */ ret = smb347_update_reg(client, smb347_PIN_CTRL, PIN_ACT_LOW); if (ret < 0) { dev_err(&client->dev, "%s(): Failed to" "enable charger\n", __func__); - return ret; } } else { /*Pin Controls -active high */ @@ -404,12 +406,38 @@ static int smb347_charger_enable(bool enable) if (ret < 0) { dev_err(&client->dev, "%s(): Failed to" "disable charger\n", __func__); - return ret; } } + + mutex_unlock(&charger->pinctrl_lock); return ret; } +int smb347_charger_enable(bool state) +{ + struct i2c_client *client = charger->client; + u8 ret = 0; + + ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + charge_en_flag = state; + smb347_pin_control(state); + + ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + +error: + return ret; +} +EXPORT_SYMBOL_GPL(smb347_charger_enable); static int smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) @@ -430,7 +458,8 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) } /* disable charger */ - smb347_charger_enable(0); + if (charge_en_flag) + smb347_pin_control(0); /* AICL disable */ retval = smb347_read(client, smb347_VRS_FUNC); @@ -500,7 +529,8 @@ smb347_set_InputCurrentlimit(struct i2c_client *client, u32 current_limit) } /* enable charger */ - smb347_charger_enable(1); + if (charge_en_flag) + smb347_pin_control(1); /* Disable volatile writes to registers */ ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); @@ -942,20 +972,15 @@ static int cable_type_detect(void) #ifdef TOUCH_CALLBACK_ENABLED touch_callback(usb_cable); #endif - // tmtmtm start - } else if(retval == APSD_SDP2) { - printk("Cable: SDP2 host mode charging\n"); - success = battery_callback(usb_cable); -#ifdef TOUCH_CALLBACK_ENABLED - touch_callback(usb_cable); -#endif - // tmtmtm end } else { charger->cur_cable_type = unknow_cable; printk(KERN_INFO "Unkown Plug In Cable type !\n"); - if (gpio_get_value(dock_in)) { - charger->cur_cable_type = usb_cable; - success = battery_callback(usb_cable); + + if(usb_det_cable_type) { + printk(KERN_INFO "Use usb det %s cable to report\n", + (usb_det_cable_type == ac_cable) ? "ac" : "usb"); + charger->cur_cable_type = usb_det_cable_type; + success = battery_callback(usb_det_cable_type); } } } else { @@ -982,6 +1007,16 @@ static int cable_type_detect(void) return success; } +void usb_det_cable_callback(unsigned cable_type) +{ + usb_det_cable_type = cable_type; + SMB_NOTICE("usb_det_cable_type=%d\n", usb_det_cable_type); + + if(unknow_cable == charger->cur_cable_type) { + cable_type_detect(); + } +} + static void inok_isr_work_function(struct work_struct *dat) { struct i2c_client *client = charger->client; @@ -1088,6 +1123,44 @@ static void smb347_default_setback(void) } } +static int smb347_temp_limit_setting(void) +{ + struct i2c_client *client = charger->client; + int ret = 0, retval, val; + + /* Enable volatile writes to registers */ + ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + val = smb347_read(client, smb347_HRD_SFT_TEMP); + if (val < 0) { + dev_err(&client->dev, "%s(): Failed in reading 0x%02x", + __func__, smb347_HRD_SFT_TEMP); + goto error; + } + val &= 0xcf; + /* Set Hard Limit Hot Temperature 59 Degree */ + ret = smb347_write(client, smb347_HRD_SFT_TEMP, val | 0x20); + if (ret < 0) { + dev_err(&client->dev, "%s(): Failed in writing 0x%02x to register" + "0x%02x\n", __func__, val, smb347_HRD_SFT_TEMP); + goto error; + } + /* Disable volatile writes to registers */ + ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() error in configuring charger..\n", + __func__); + goto error; + } + return 0; +error: + return -1; +} + static int __devinit smb347_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -1107,6 +1180,7 @@ static int __devinit smb347_probe(struct i2c_client *client, i2c_set_clientdata(client, charger); /* Restore default setting: APSD Enable & 5/1/HC mode Pin control */ + smb347_temp_limit_setting(); smb347_default_setback(); ret = sysfs_create_group(&client->dev.kobj, &smb347_group); @@ -1116,6 +1190,7 @@ static int __devinit smb347_probe(struct i2c_client *client, mutex_init(&charger->cable_lock); mutex_init(&charger->dockin_lock); + mutex_init(&charger->pinctrl_lock); wake_lock_init(&charger->wake_lock_dockin, WAKE_LOCK_SUSPEND, "wake_lock_dockin"); From 38c499a1a084c864baa5defb930b06d96d403cc9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 4 Aug 2013 02:22:17 -0400 Subject: [PATCH 544/678] defconfig a60 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 04328c8e547..beed48e55f9 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a58" +CONFIG_LOCALVERSION="-MKernel-a60" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y From 5649208e40ef2f6bdfaea948d9a31ecf2f21bcc2 Mon Sep 17 00:00:00 2001 From: davy39 Date: Fri, 9 Aug 2013 22:33:00 +0800 Subject: [PATCH 545/678] dm9601: support dm9620 variant. *** Copied from kernel 3.10: dm9620 is a newer variant of dm9601 with more features (usb 2.0, checksum offload, ..), but it can also be put in a dm9601 compatible mode, allowing us to reuse the existing driver. This does mean that the extended features like checksum offload cannot be used, but that's hardly critical on a 100mbps interface. *** opied from commit/296fd44b5bf5ebca48f8bbbcecb6c5e2ab73d370 This is kind of a backport from linux kernel 3.10. Instead of identify device type by reading device id, this change specify device type explicitly. A few dm9601 and dm9620 devices were tested working with this change. As before the change, IP addr can not be obtained through dhclient except a few rare times. Static IP addr works flawlessly. With this change it's better unselect the existing dm9620 driver in "make menuconfig". --- drivers/net/usb/dm9601.c | 56 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 1d93133e9b7..a90faaba5a7 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -45,6 +45,12 @@ #define DM_MCAST_ADDR 0x16 /* 8 bytes */ #define DM_GPR_CTRL 0x1e #define DM_GPR_DATA 0x1f +#define DM_CHIP_ID 0x2c +#define DM_MODE_CTRL 0x91 /* only on dm9620 */ + +/* chip id values */ +#define ID_DM9601 0 +#define ID_DM9620 1 #define DM_MAX_MCAST 64 #define DM_MCAST_SIZE 8 @@ -432,7 +438,8 @@ static const struct net_device_ops dm9601_netdev_ops = { .ndo_set_mac_address = dm9601_set_mac_address, }; -static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) +static int dm9601_bind_common( + struct usbnet *dev, struct usb_interface *intf, int dev_type) { int ret; u8 mac[ETH_ALEN]; @@ -476,6 +483,18 @@ static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) __dm9601_set_mac_address(dev); } + /* put dm9620 devices in dm9601 mode */ + if (dev_type == ID_DM9620) { + u8 mode; + + if (dm_read_reg(dev, DM_MODE_CTRL, &mode) < 0) { + netdev_err(dev->net, "Error reading MODE_CTRL\n"); + ret = -ENODEV; + goto out; + } + dm_write_reg(dev, DM_MODE_CTRL, mode & 0x7f); + } + /* power up phy */ dm_write_reg(dev, DM_GPR_CTRL, 1); dm_write_reg(dev, DM_GPR_DATA, 0); @@ -492,6 +511,16 @@ static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) return ret; } +static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) +{ + return dm9601_bind_common(dev, intf, ID_DM9601); +} + +static int dm9620_bind(struct usbnet *dev, struct usb_interface *intf) +{ + return dm9601_bind_common(dev, intf, ID_DM9620); +} + static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { u8 status; @@ -621,6 +650,17 @@ static const struct driver_info dm9601_info = { .reset = dm9601_link_reset, }; +static const struct driver_info dm9620_info = { + .description = "Davicom DM9620 USB Ethernet", + .flags = FLAG_ETHER | FLAG_LINK_INTR, + .bind = dm9620_bind, + .rx_fixup = dm9601_rx_fixup, + .tx_fixup = dm9601_tx_fixup, + .status = dm9601_status, + .link_reset = dm9601_link_reset, + .reset = dm9601_link_reset, +}; + static const struct usb_device_id products[] = { { USB_DEVICE(0x07aa, 0x9601), /* Corega FEther USB-TXC */ @@ -658,13 +698,25 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0a46, 0x9000), /* DM9000E */ .driver_info = (unsigned long)&dm9601_info, }, + { + USB_DEVICE(0x0a46, 0x9620), /* DM9620 USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9620_info, + }, + { + USB_DEVICE(0x0a46, 0x9621), /* DM9621 USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9620_info, + }, + { + USB_DEVICE(0x0a46, 0x9622), /* DM9622 USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9620_info, + }, {}, // END }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver dm9601_driver = { - .name = "dm9601", + .name = "dm9601-962X", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, From eaf78f21beadaad2338e8e1c5d38bf946e96d002 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Aug 2013 18:12:31 -0400 Subject: [PATCH 546/678] USB host mode charging (old) by Mehrvarz --- drivers/usb/otg/tegra-otg.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/usb/otg/tegra-otg.c b/drivers/usb/otg/tegra-otg.c index 9d7b09d0422..4a1bae6d1d2 100755 --- a/drivers/usb/otg/tegra-otg.c +++ b/drivers/usb/otg/tegra-otg.c @@ -65,6 +65,9 @@ struct tegra_otg_data { }; static struct tegra_otg_data *tegra_clone; +static bool tegra_otg_on_charging = false; +module_param(tegra_otg_on_charging, bool, 0664); + static inline unsigned long otg_readl(struct tegra_otg_data *tegra, unsigned int offset) { @@ -231,8 +234,15 @@ static void irq_work(struct work_struct *work) dev_info(tegra->otg.dev, "%s --> %s\n", tegra_state_name(from), tegra_state_name(to)); - if (tegra->charger_cb) - tegra->charger_cb(to, from, tegra->charger_cb_data); + if (tegra->charger_cb) { +// tegra->charger_cb(to, from, tegra->charger_cb_data); + if (tegra_otg_on_charging) + /* enable v_bus detection for charging */ + tegra->detect_vbus = true; + else + /* enable OTG to supply internal power */ + tegra->charger_cb(to, from, tegra->charger_cb_data); + } if (to == OTG_STATE_A_SUSPEND) { if (from == OTG_STATE_A_HOST) From 87024045e8d1c90191764f152bc0d5e742b3ef9e Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Aug 2013 18:33:20 -0400 Subject: [PATCH 547/678] revert bad hack to compile dhd_linux.c --- drivers/net/wireless/bcmdhd/dhd_linux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index d5312180fd3..44532405bbb 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -641,9 +641,9 @@ dhd_dynamic_dtim_skip_release(dhd_pub_t *dhdp) static int dhd_set_suspend(int value, dhd_pub_t *dhd) { -//#if !defined(SUPPORT_PM2_ONLY) +#if !defined(SUPPORT_PM2_ONLY) int power_mode = PM_MAX; -//#endif +#endif /* wl_pkt_filter_enable_t enable_parm; */ char iovbuf[32]; int bcn_li_dtim = 3; From d16619399a71c77d9ed2717750f3394c9316be82 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Aug 2013 18:33:50 -0400 Subject: [PATCH 548/678] Revert "Added kernel config option 'BCMDHD_WIFI_PM'" This reverts commit 64dfe65e4a3de99bbf3696e6143df253180b4cfc. --- drivers/net/wireless/bcmdhd/Kconfig | 8 -------- drivers/net/wireless/bcmdhd/dhd_linux.c | 11 ----------- 2 files changed, 19 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/Kconfig b/drivers/net/wireless/bcmdhd/Kconfig index 7448a941279..8b6b92f6243 100644 --- a/drivers/net/wireless/bcmdhd/Kconfig +++ b/drivers/net/wireless/bcmdhd/Kconfig @@ -52,11 +52,3 @@ config DHD_ENABLE_P2P default n ---help--- Use Enable Wifi Direct - -config BCMDHD_WIFI_PM - bool "Enable support for changing the WiFi power mode" - depends on BCMDHD - default n - help - Enable support for changing the WiFi power mode for - screen-off. diff --git a/drivers/net/wireless/bcmdhd/dhd_linux.c b/drivers/net/wireless/bcmdhd/dhd_linux.c index 44532405bbb..b26e2419699 100755 --- a/drivers/net/wireless/bcmdhd/dhd_linux.c +++ b/drivers/net/wireless/bcmdhd/dhd_linux.c @@ -539,12 +539,6 @@ static void dhd_set_packet_filter(int value, dhd_pub_t *dhd) #endif } -#ifdef CONFIG_BCMDHD_WIFI_PM -static int wifi_pm = 0; - -module_param(wifi_pm, int, 0755); -#endif - #ifdef DYNAMIC_DTIM_SKIP static int dhd_dtim_thread(void *data) @@ -653,11 +647,6 @@ static int dhd_set_suspend(int value, dhd_pub_t *dhd) __FUNCTION__, value, dhd->in_suspend)); dhd_suspend_lock(dhd); - -#ifdef CONFIG_BCMDHD_WIFI_PM - if (wifi_pm == 1) - power_mode = PM_FAST; -#endif if (dhd && dhd->up) { if (value && dhd->in_suspend) { From 3e0123013f6f8d7def516752c21f530d83b1fee9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Aug 2013 18:50:32 -0400 Subject: [PATCH 549/678] defconfig: update for 4.3 kernel --- arch/arm/configs/metallice_grouper_defconfig | 60 +++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index beed48e55f9..b99455a0782 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a60" +CONFIG_LOCALVERSION="-MKernel-" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -53,7 +53,7 @@ CONFIG_DEFAULT_HOSTNAME="(none)" # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_FHANDLE is not set # CONFIG_TASKSTATS is not set -# CONFIG_AUDIT is not set +CONFIG_AUDIT=y CONFIG_HAVE_GENERIC_HARDIRQS=y # @@ -144,7 +144,6 @@ CONFIG_PROFILING=y CONFIG_TRACEPOINTS=y CONFIG_OPROFILE=y CONFIG_HAVE_OPROFILE=y -# CONFIG_KPROBES is not set CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_USE_GENERIC_SMP_HELPERS=y @@ -161,12 +160,7 @@ CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 -CONFIG_MODULES=y -# CONFIG_MODULE_FORCE_LOAD is not set -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_MODULES is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y CONFIG_LBDAF=y @@ -688,9 +682,10 @@ CONFIG_IPV6_TUNNEL=y CONFIG_IPV6_MULTIPLE_TABLES=y # CONFIG_IPV6_SUBTREES is not set # CONFIG_IPV6_MROUTE is not set +# CONFIG_NETLABEL is not set CONFIG_ANDROID_PARANOID_NETWORK=y CONFIG_NET_ACTIVITY_STATS=y -# CONFIG_NETWORK_SECMARK is not set +CONFIG_NETWORK_SECMARK=y # CONFIG_NETWORK_PHY_TIMESTAMPING is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set @@ -704,6 +699,7 @@ CONFIG_NETFILTER_NETLINK_QUEUE=y CONFIG_NETFILTER_NETLINK_LOG=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y # CONFIG_NF_CONNTRACK_TIMESTAMP is not set CONFIG_NF_CT_PROTO_DCCP=y @@ -734,9 +730,11 @@ CONFIG_NETFILTER_XT_CONNMARK=y # # Xtables targets # +# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set # CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y # CONFIG_NETFILTER_XT_TARGET_CT is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set @@ -749,6 +747,7 @@ CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y # CONFIG_NETFILTER_XT_TARGET_TEE is not set CONFIG_NETFILTER_XT_TARGET_TPROXY=y CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set @@ -835,6 +834,7 @@ CONFIG_IP_NF_MANGLE=y # CONFIG_IP_NF_TARGET_ECN is not set # CONFIG_IP_NF_TARGET_TTL is not set CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y CONFIG_IP_NF_ARPTABLES=y CONFIG_IP_NF_ARPFILTER=y CONFIG_IP_NF_ARP_MANGLE=y @@ -861,6 +861,7 @@ CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_TARGET_REJECT_SKERR=y CONFIG_IP6_NF_MANGLE=y CONFIG_IP6_NF_RAW=y +# CONFIG_IP6_NF_SECURITY is not set # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set @@ -1148,7 +1149,6 @@ CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set -# CONFIG_SCSI_WAIT_SCAN is not set # # SCSI Transports @@ -1287,7 +1287,6 @@ CONFIG_BCMDHD_NVRAM_PATH="/system/etc/nvram.txt" # CONFIG_DHD_USE_STATIC_BUF is not set # CONFIG_DHD_USE_SCHED_SCAN is not set CONFIG_DHD_ENABLE_P2P=y -CONFIG_BCMDHD_WIFI_PM=y # CONFIG_HOSTAP is not set # CONFIG_IPW2100 is not set # CONFIG_IPW2200 is not set @@ -1635,7 +1634,6 @@ CONFIG_I2C_TEGRA=y # # Other I2C/SMBus bus drivers # -# CONFIG_I2C_STUB is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -1981,7 +1979,6 @@ CONFIG_VIDEO_MEDIA=y # Multimedia drivers # # CONFIG_RC_CORE is not set -# CONFIG_MEDIA_ATTACH is not set CONFIG_MEDIA_TUNER=y # CONFIG_MEDIA_TUNER_CUSTOMISE is not set CONFIG_MEDIA_TUNER_SIMPLE=y @@ -2347,7 +2344,7 @@ CONFIG_UHID=y # CONFIG_USB_HID=y # CONFIG_HID_PID is not set -# CONFIG_USB_HIDDEV is not set +CONFIG_USB_HIDDEV=y # # Special HID drivers @@ -2778,9 +2775,6 @@ CONFIG_STAGING=y # CONFIG_ECHO is not set # CONFIG_BRCMUTIL is not set # CONFIG_ASUS_OLED is not set -# CONFIG_R8187SE is not set -# CONFIG_RTL8192U is not set -# CONFIG_RTL8192E is not set # CONFIG_R8712U is not set # CONFIG_RTS_PSTOR is not set # CONFIG_TRANZPORT is not set @@ -2806,8 +2800,6 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y # CONFIG_LINE6_USB is not set # CONFIG_USB_SERIAL_QUATECH2 is not set # CONFIG_USB_SERIAL_QUATECH_USB2 is not set -# CONFIG_VT6655 is not set -# CONFIG_VT6656 is not set # CONFIG_VME_BUS is not set # CONFIG_DX_SEP is not set CONFIG_IIO=y @@ -2907,7 +2899,7 @@ CONFIG_SENSORS_LTR558=y # # CONFIG_SENSORS_HMC5843 is not set # CONFIG_INV_YAS53X_IIO is not set -# CONFIG_INV_AMI306_IIO is not set +CONFIG_INV_AMI306_IIO=y # # Active energy metering IC @@ -2941,7 +2933,6 @@ CONFIG_SENSORS_LTR558=y # CONFIG_EASYCAP is not set # CONFIG_SOLO6X10 is not set # CONFIG_ATH6K_LEGACY is not set -# CONFIG_USB_ENESTORAGE is not set # CONFIG_BCM_WIMAX is not set # CONFIG_FT1000 is not set @@ -2975,7 +2966,7 @@ CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_XATTR=y CONFIG_EXT4_FS_POSIX_ACL=y -# CONFIG_EXT4_FS_SECURITY is not set +CONFIG_EXT4_FS_SECURITY=y # CONFIG_EXT4_DEBUG is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set @@ -3252,11 +3243,26 @@ CONFIG_ARM_UNWIND=y CONFIG_KEYS=y # CONFIG_KEYS_DEBUG_PROC_KEYS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set -# CONFIG_SECURITY is not set +CONFIG_SECURITY=y # CONFIG_SECURITYFS is not set +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +# CONFIG_SECURITY_PATH is not set +CONFIG_LSM_MMAP_MIN_ADDR=4096 +CONFIG_SECURITY_SELINUX=y +# CONFIG_SECURITY_SELINUX_BOOTPARAM is not set +# CONFIG_SECURITY_SELINUX_DISABLE is not set +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_TOMOYO is not set +# CONFIG_SECURITY_APPARMOR is not set CONFIG_TRUSTED_FOUNDATIONS=y -CONFIG_DEFAULT_SECURITY_DAC=y -CONFIG_DEFAULT_SECURITY="" +# CONFIG_IMA is not set +CONFIG_DEFAULT_SECURITY_SELINUX=y +# CONFIG_DEFAULT_SECURITY_DAC is not set +CONFIG_DEFAULT_SECURITY="selinux" CONFIG_CRYPTO=y # @@ -3281,7 +3287,6 @@ CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=y -# CONFIG_CRYPTO_TEST is not set # # Authenticated Encryption with Associated Data @@ -3379,6 +3384,7 @@ CONFIG_CRC32=y # CONFIG_CRC7 is not set CONFIG_LIBCRC32C=y # CONFIG_CRC8 is not set +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y From e771e9d4310424cd2b361c1013b1f8ffc28844d0 Mon Sep 17 00:00:00 2001 From: faux123 Date: Tue, 18 Sep 2012 23:23:13 -0700 Subject: [PATCH 550/678] fs/dyn_sync_cntrl: dynamic sync control The dynamic sync control interface uses Android kernel's unique early suspend / lat resume interface. While screen is on, file sync is disabled when screen is off, a file sync is called to flush all outstanding writes and restore file sync operation as normal. Signed-off-by: Paul Reioux Conflicts: fs/sync.c --- fs/Kconfig | 6 ++ fs/Makefile | 2 + fs/dyn_sync_cntrl.c | 155 ++++++++++++++++++++++++++++++++++++++++++++ fs/sync.c | 50 +++++++++++--- include/linux/fs.h | 1 + 5 files changed, 205 insertions(+), 9 deletions(-) create mode 100644 fs/dyn_sync_cntrl.c diff --git a/fs/Kconfig b/fs/Kconfig index 99453badf45..3130a45eafa 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -281,4 +281,10 @@ endif source "fs/nls/Kconfig" source "fs/dlm/Kconfig" +config DYNAMIC_FSYNC + bool "dynamic file sync control" + default n + help + An experimental file sync control using Android's early suspend / late resume drivers + endmenu diff --git a/fs/Makefile b/fs/Makefile index a8bbb322701..cd17b767c56 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -126,3 +126,5 @@ obj-$(CONFIG_PSTORE) += pstore/ # Patched by YAFFS obj-$(CONFIG_YAFFS_FS) += yaffs2/ + +obj-$(CONFIG_DYNAMIC_FSYNC) += dyn_sync_cntrl.o diff --git a/fs/dyn_sync_cntrl.c b/fs/dyn_sync_cntrl.c new file mode 100644 index 00000000000..21befb7427e --- /dev/null +++ b/fs/dyn_sync_cntrl.c @@ -0,0 +1,155 @@ +/* + * Author: Paul Reioux aka Faux123 + * + * Copyright 2012 Paul Reioux + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include + +#include + +#define DYN_FSYNC_VERSION 1 + +/* + * fsync_mutex protects dyn_fsync_active during early suspend / lat resume transitions + */ +static DEFINE_MUTEX(fsync_mutex); + +bool early_suspend_active = false; +static bool dyn_fsync_active = true; + +static ssize_t dyn_fsync_active_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", (dyn_fsync_active ? 1 : 0)); +} + +static ssize_t dyn_fsync_active_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int data; + + if(sscanf(buf, "%u\n", &data) == 1) { + if (data == 1) { + pr_info("%s: dynamic fsync enabled\n", __FUNCTION__); + dyn_fsync_active = true; + } + else if (data == 0) { + pr_info("%s: dyanamic fsync disabled\n", __FUNCTION__); + dyn_fsync_active = false; + } + else + pr_info("%s: bad value: %u\n", __FUNCTION__, data); + } else + pr_info("%s: unknown input!\n", __FUNCTION__); + + return count; +} + +static ssize_t dyn_fsync_version_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "version: %u\n", DYN_FSYNC_VERSION); +} + +static ssize_t dyn_fsync_earlysuspend_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "early suspend active: %u\n", early_suspend_active); +} + +static struct kobj_attribute dyn_fsync_active_attribute = + __ATTR(Dyn_fsync_active, 0666, dyn_fsync_active_show, dyn_fsync_active_store); + +static struct kobj_attribute dyn_fsync_version_attribute = + __ATTR(Dyn_fsync_version, 0444 , dyn_fsync_version_show, NULL); + +static struct kobj_attribute dyn_fsync_earlysuspend_attribute = + __ATTR(Dyn_fsync_earlysuspend, 0444 , dyn_fsync_earlysuspend_show, NULL); + +static struct attribute *dyn_fsync_active_attrs[] = + { + &dyn_fsync_active_attribute.attr, + &dyn_fsync_version_attribute.attr, + &dyn_fsync_earlysuspend_attribute.attr, + NULL, + }; + +static struct attribute_group dyn_fsync_active_attr_group = + { + .attrs = dyn_fsync_active_attrs, + }; + +static struct kobject *dyn_fsync_kobj; + +static void dyn_fsync_early_suspend(struct early_suspend *h) +{ + mutex_lock(&fsync_mutex); + if (dyn_fsync_active) { + early_suspend_active = true; +#if 1 + /* flush all outstanding buffers */ + wakeup_flusher_threads(0); + sync_filesystems(0); + sync_filesystems(1); +#endif + } + mutex_unlock(&fsync_mutex); +} + +static void dyn_fsync_late_resume(struct early_suspend *h) +{ + mutex_lock(&fsync_mutex); + early_suspend_active = false; + mutex_unlock(&fsync_mutex); +} + +static struct early_suspend dyn_fsync_early_suspend_handler = + { + .level = EARLY_SUSPEND_LEVEL_BLANK_SCREEN, + .suspend = dyn_fsync_early_suspend, + .resume = dyn_fsync_late_resume, + }; + +static int dyn_fsync_init(void) +{ + int sysfs_result; + + register_early_suspend(&dyn_fsync_early_suspend_handler); + + dyn_fsync_kobj = kobject_create_and_add("dyn_fsync", kernel_kobj); + if (!dyn_fsync_kobj) { + pr_err("%s dyn_fsync kobject create failed!\n", __FUNCTION__); + return -ENOMEM; + } + + sysfs_result = sysfs_create_group(dyn_fsync_kobj, &dyn_fsync_active_attr_group); + + if (sysfs_result) { + pr_info("%s dyn_fsync sysfs create failed!\n", __FUNCTION__); + kobject_put(dyn_fsync_kobj); + } + return sysfs_result; +} + +static void dyn_fsync_exit(void) +{ + unregister_early_suspend(&dyn_fsync_early_suspend_handler); + + if (dyn_fsync_kobj != NULL) + kobject_put(dyn_fsync_kobj); +} + +module_init(dyn_fsync_init); +module_exit(dyn_fsync_exit); + diff --git a/fs/sync.c b/fs/sync.c index a10e39a4b6a..065beb0e428 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -18,6 +18,10 @@ #include #include "internal.h" +#ifdef CONFIG_DYNAMIC_FSYNC +extern bool early_suspend_active; +#endif + #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) @@ -91,7 +95,7 @@ static void sync_one_sb(struct super_block *sb, void *arg) * Sync all the data for all the filesystems (called by sys_sync() and * emergency sync) */ -static void sync_filesystems(int wait) +void sync_filesystems(int wait) { iterate_supers(sync_one_sb, &wait); } @@ -179,10 +183,17 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) if (!fsynccontrol_fsync_enabled()) return 0; #endif - +#ifdef CONFIG_DYNAMIC_FSYNC + if (!early_suspend_active) + return 0; + else { +#endif if (!file->f_op || !file->f_op->fsync) return -EINVAL; return file->f_op->fsync(file, start, end, datasync); +#ifdef CONFIG_DYNAMIC_FSYNC + } +#endif } EXPORT_SYMBOL(vfs_fsync_range); @@ -230,7 +241,11 @@ SYSCALL_DEFINE1(fsync, unsigned int, fd) if (!fsynccontrol_fsync_enabled()) return 0; #endif - +#ifdef CONFIG_DYNAMIC_FSYNC + if (!early_suspend_active) + return 0; + else +#endif return do_fsync(fd, 0); } @@ -240,7 +255,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) if (!fsynccontrol_fsync_enabled()) return 0; #endif - +#ifdef CONFIG_DYNAMIC_FSYNC + if (!early_suspend_active) + return 0; + else +#endif return do_fsync(fd, 1); } @@ -316,6 +335,16 @@ EXPORT_SYMBOL(generic_write_sync); SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, unsigned int flags) { +#ifdef CONFIG_FSYNC_CONTROL + if (!fsynccontrol_fsync_enabled()) + return 0; +#endif +#ifdef CONFIG_DYNAMIC_FSYNC + if (!early_suspend_active) + return 0; + else { +#endif + int ret; struct file *file; struct address_space *mapping; @@ -323,11 +352,6 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, int fput_needed; umode_t i_mode; -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif - ret = -EINVAL; if (flags & ~VALID_FLAGS) goto out; @@ -400,6 +424,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, fput_light(file, fput_needed); out: return ret; +#ifdef CONFIG_DYNAMIC_FSYNC + } +#endif } #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes, @@ -416,6 +443,11 @@ SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range); SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, loff_t offset, loff_t nbytes) { +#ifdef CONFIG_DYNAMIC_FSYNC + if (!early_suspend_active) + return 0; + else +#endif return sys_sync_file_range(fd, offset, nbytes, flags); } #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS diff --git a/include/linux/fs.h b/include/linux/fs.h index cf7bc25928c..5c3b043a645 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2077,6 +2077,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) } #endif extern int sync_filesystem(struct super_block *); +extern void sync_filesystems(int wait); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; From bc575e51ffcfaf07f22f454081bc5de0a3b65ade Mon Sep 17 00:00:00 2001 From: Andrew Bartholomew Date: Sun, 14 Apr 2013 00:16:08 -0500 Subject: [PATCH 551/678] fs/dyn_fsync: check dyn fsync control's active prior to performing fsync ops Signed-off-by: Andrew Bartholomew --- fs/dyn_sync_cntrl.c | 2 +- fs/sync.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/dyn_sync_cntrl.c b/fs/dyn_sync_cntrl.c index 21befb7427e..d97f154b032 100644 --- a/fs/dyn_sync_cntrl.c +++ b/fs/dyn_sync_cntrl.c @@ -30,7 +30,7 @@ static DEFINE_MUTEX(fsync_mutex); bool early_suspend_active = false; -static bool dyn_fsync_active = true; +bool dyn_fsync_active = true; static ssize_t dyn_fsync_active_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { diff --git a/fs/sync.c b/fs/sync.c index 065beb0e428..cf4c926d646 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -20,6 +20,7 @@ #ifdef CONFIG_DYNAMIC_FSYNC extern bool early_suspend_active; +extern bool dyn_fsync_active; #endif #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ @@ -184,7 +185,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (!early_suspend_active) + if (dyn_fsync_active && !early_suspend_active) return 0; else { #endif @@ -242,7 +243,7 @@ SYSCALL_DEFINE1(fsync, unsigned int, fd) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (!early_suspend_active) + if (dyn_fsync_active && !early_suspend_active) return 0; else #endif @@ -256,7 +257,7 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (!early_suspend_active) + if (dyn_fsync_active && !early_suspend_active) return 0; else #endif @@ -340,7 +341,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (!early_suspend_active) + if (dyn_fsync_active && !early_suspend_active) return 0; else { #endif @@ -444,7 +445,7 @@ SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, loff_t offset, loff_t nbytes) { #ifdef CONFIG_DYNAMIC_FSYNC - if (!early_suspend_active) + if (dyn_fsync_active && !early_suspend_active) return 0; else #endif From f5bcb30cfc8c33897e861ead4ccd93cc338d61f7 Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Sun, 14 Apr 2013 00:50:10 -0500 Subject: [PATCH 552/678] dynamic filesync: add some cache optimizations and clean up file format and typos bump version to 1.1 Signed-off-by: Paul Reioux --- fs/dyn_sync_cntrl.c | 39 +++++++++++++++++++++++++-------------- fs/sync.c | 10 +++++----- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/fs/dyn_sync_cntrl.c b/fs/dyn_sync_cntrl.c index d97f154b032..0b1e210b30a 100644 --- a/fs/dyn_sync_cntrl.c +++ b/fs/dyn_sync_cntrl.c @@ -1,6 +1,7 @@ /* * Author: Paul Reioux aka Faux123 * + * Copyright 2013 Paul Reioux * Copyright 2012 Paul Reioux * * This software is licensed under the terms of the GNU General Public @@ -22,22 +23,26 @@ #include -#define DYN_FSYNC_VERSION 1 +#define DYN_FSYNC_VERSION_MAJOR 1 +#define DYN_FSYNC_VERSION_MINOR 1 /* - * fsync_mutex protects dyn_fsync_active during early suspend / lat resume transitions + * fsync_mutex protects dyn_fsync_active during early suspend / late resume + * transitions */ static DEFINE_MUTEX(fsync_mutex); -bool early_suspend_active = false; -bool dyn_fsync_active = true; +bool early_suspend_active __read_mostly = false; +bool dyn_fsync_active __read_mostly = true; -static ssize_t dyn_fsync_active_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +static ssize_t dyn_fsync_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", (dyn_fsync_active ? 1 : 0)); } -static ssize_t dyn_fsync_active_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +static ssize_t dyn_fsync_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) { unsigned int data; @@ -58,24 +63,30 @@ static ssize_t dyn_fsync_active_store(struct kobject *kobj, struct kobj_attribut return count; } -static ssize_t dyn_fsync_version_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +static ssize_t dyn_fsync_version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "version: %u\n", DYN_FSYNC_VERSION); + return sprintf(buf, "version: %u.%u by faux123\n", + DYN_FSYNC_VERSION_MAJOR, + DYN_FSYNC_VERSION_MINOR); } -static ssize_t dyn_fsync_earlysuspend_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +static ssize_t dyn_fsync_earlysuspend_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "early suspend active: %u\n", early_suspend_active); } static struct kobj_attribute dyn_fsync_active_attribute = - __ATTR(Dyn_fsync_active, 0666, dyn_fsync_active_show, dyn_fsync_active_store); + __ATTR(Dyn_fsync_active, 0666, + dyn_fsync_active_show, + dyn_fsync_active_store); static struct kobj_attribute dyn_fsync_version_attribute = - __ATTR(Dyn_fsync_version, 0444 , dyn_fsync_version_show, NULL); + __ATTR(Dyn_fsync_version, 0444, dyn_fsync_version_show, NULL); static struct kobj_attribute dyn_fsync_earlysuspend_attribute = - __ATTR(Dyn_fsync_earlysuspend, 0444 , dyn_fsync_earlysuspend_show, NULL); + __ATTR(Dyn_fsync_earlysuspend, 0444, dyn_fsync_earlysuspend_show, NULL); static struct attribute *dyn_fsync_active_attrs[] = { @@ -133,7 +144,8 @@ static int dyn_fsync_init(void) return -ENOMEM; } - sysfs_result = sysfs_create_group(dyn_fsync_kobj, &dyn_fsync_active_attr_group); + sysfs_result = sysfs_create_group(dyn_fsync_kobj, + &dyn_fsync_active_attr_group); if (sysfs_result) { pr_info("%s dyn_fsync sysfs create failed!\n", __FUNCTION__); @@ -152,4 +164,3 @@ static void dyn_fsync_exit(void) module_init(dyn_fsync_init); module_exit(dyn_fsync_exit); - diff --git a/fs/sync.c b/fs/sync.c index cf4c926d646..69c7200923b 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -185,7 +185,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (dyn_fsync_active && !early_suspend_active) + if (unlikely(dyn_fsync_active && !early_suspend_active)) return 0; else { #endif @@ -243,7 +243,7 @@ SYSCALL_DEFINE1(fsync, unsigned int, fd) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (dyn_fsync_active && !early_suspend_active) + if (unlikely(dyn_fsync_active && !early_suspend_active)) return 0; else #endif @@ -257,7 +257,7 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (dyn_fsync_active && !early_suspend_active) + if (unlikely(dyn_fsync_active && !early_suspend_active)) return 0; else #endif @@ -341,7 +341,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (dyn_fsync_active && !early_suspend_active) + if (unlikely(dyn_fsync_active && !early_suspend_active)) return 0; else { #endif @@ -445,7 +445,7 @@ SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, loff_t offset, loff_t nbytes) { #ifdef CONFIG_DYNAMIC_FSYNC - if (dyn_fsync_active && !early_suspend_active) + if (unlikely(dyn_fsync_active && !early_suspend_active)) return 0; else #endif From 12a05e29d1222fdf1babd1683c4e6f3a206559a1 Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Tue, 21 May 2013 19:21:04 -0500 Subject: [PATCH 553/678] dynamic fsync: add reboot notifier to force flush outstanding data this should further prevent data corruption from kernel panics and forced reboots bump version to 1.2 Signed-off-by: Paul Reioux --- fs/dyn_sync_cntrl.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/dyn_sync_cntrl.c b/fs/dyn_sync_cntrl.c index 0b1e210b30a..42e98e43c3f 100644 --- a/fs/dyn_sync_cntrl.c +++ b/fs/dyn_sync_cntrl.c @@ -20,11 +20,12 @@ #include #include #include - +#include +#include #include #define DYN_FSYNC_VERSION_MAJOR 1 -#define DYN_FSYNC_VERSION_MINOR 1 +#define DYN_FSYNC_VERSION_MINOR 2 /* * fsync_mutex protects dyn_fsync_active during early suspend / late resume @@ -103,17 +104,20 @@ static struct attribute_group dyn_fsync_active_attr_group = static struct kobject *dyn_fsync_kobj; +static void dyn_fsync_force_flush(void) +{ + /* flush all outstanding buffers */ + wakeup_flusher_threads(0); + sync_filesystems(0); + sync_filesystems(1); +} + static void dyn_fsync_early_suspend(struct early_suspend *h) { mutex_lock(&fsync_mutex); if (dyn_fsync_active) { early_suspend_active = true; -#if 1 - /* flush all outstanding buffers */ - wakeup_flusher_threads(0); - sync_filesystems(0); - sync_filesystems(1); -#endif + dyn_fsync_force_flush(); } mutex_unlock(&fsync_mutex); } @@ -132,11 +136,27 @@ static struct early_suspend dyn_fsync_early_suspend_handler = .resume = dyn_fsync_late_resume, }; +static int dyn_fsync_notify_sys(struct notifier_block *this, unsigned long code, + void *unused) +{ + if (code == SYS_DOWN || code == SYS_HALT) { + early_suspend_active = true; + dyn_fsync_force_flush(); + pr_warn("dyn fsync: force flush!\n"); + } + return NOTIFY_DONE; +} + +static struct notifier_block dyn_fsync_notifier = { + .notifier_call = dyn_fsync_notify_sys, +}; + static int dyn_fsync_init(void) { int sysfs_result; register_early_suspend(&dyn_fsync_early_suspend_handler); + register_reboot_notifier(&dyn_fsync_notifier); dyn_fsync_kobj = kobject_create_and_add("dyn_fsync", kernel_kobj); if (!dyn_fsync_kobj) { @@ -157,6 +177,7 @@ static int dyn_fsync_init(void) static void dyn_fsync_exit(void) { unregister_early_suspend(&dyn_fsync_early_suspend_handler); + unregister_reboot_notifier(&dyn_fsync_notifier); if (dyn_fsync_kobj != NULL) kobject_put(dyn_fsync_kobj); From eb63ee39825a77f96653b9ed511f3a5ea8b1bfc1 Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Wed, 22 May 2013 18:02:19 -0500 Subject: [PATCH 554/678] dynamic fsync: add kernel panic notifier to force flush outstanding data more paranoia safety checks Signed-off-by: Paul Reioux --- fs/dyn_sync_cntrl.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/dyn_sync_cntrl.c b/fs/dyn_sync_cntrl.c index 42e98e43c3f..59b4ffbd669 100644 --- a/fs/dyn_sync_cntrl.c +++ b/fs/dyn_sync_cntrl.c @@ -136,13 +136,28 @@ static struct early_suspend dyn_fsync_early_suspend_handler = .resume = dyn_fsync_late_resume, }; +static int dyn_fsync_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + early_suspend_active = true; + dyn_fsync_force_flush(); + //pr_warn("dyn fsync: panic: force flush!\n"); + + return NOTIFY_DONE; +} + +static struct notifier_block dyn_fsync_panic_block = { + .notifier_call = dyn_fsync_panic_event, + .priority = INT_MAX, +}; + static int dyn_fsync_notify_sys(struct notifier_block *this, unsigned long code, void *unused) { if (code == SYS_DOWN || code == SYS_HALT) { early_suspend_active = true; dyn_fsync_force_flush(); - pr_warn("dyn fsync: force flush!\n"); + //pr_warn("dyn fsync: reboot: force flush!\n"); } return NOTIFY_DONE; } @@ -157,6 +172,8 @@ static int dyn_fsync_init(void) register_early_suspend(&dyn_fsync_early_suspend_handler); register_reboot_notifier(&dyn_fsync_notifier); + atomic_notifier_chain_register(&panic_notifier_list, + &dyn_fsync_panic_block); dyn_fsync_kobj = kobject_create_and_add("dyn_fsync", kernel_kobj); if (!dyn_fsync_kobj) { @@ -178,6 +195,8 @@ static void dyn_fsync_exit(void) { unregister_early_suspend(&dyn_fsync_early_suspend_handler); unregister_reboot_notifier(&dyn_fsync_notifier); + atomic_notifier_chain_unregister(&panic_notifier_list, + &dyn_fsync_panic_block); if (dyn_fsync_kobj != NULL) kobject_put(dyn_fsync_kobj); From 7991a0556d8901778b0e826f00fe2fa7aff4042a Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 12 Aug 2013 19:24:25 -0400 Subject: [PATCH 555/678] defconfig: a61 --- arch/arm/configs/metallice_grouper_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index b99455a0782..208353728cc 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-" +CONFIG_LOCALVERSION="-MKernel-a61" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -3129,6 +3129,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_UTF8 is not set +CONFIG_DYNAMIC_FSYNC=y # # Kernel hacking From e7b2d097c494ab27ee1b967207d13838dc7455b1 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 13 Sep 2013 13:42:32 -0400 Subject: [PATCH 556/678] mr2 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 208353728cc..cebd062edbf 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a61" +CONFIG_LOCALVERSION="-MKernel-mr2" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y From eadbd151063a92f692322e9fd7eefb8b3e8ffabc Mon Sep 17 00:00:00 2001 From: morfic Date: Thu, 28 Feb 2013 19:24:59 -0600 Subject: [PATCH 557/678] Not cool to force that debugging. --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 97315e5f14f..f6444984a08 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -936,7 +936,7 @@ config PANIC_TIMEOUT menuconfig EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible - select DEBUG_KERNEL + # We do not select DEBUG_KERNEL help This option allows certain base kernel options and settings to be disabled or tweaked. This is for specialized From 96f0e2afaccc3415fab19fdcbebcbd176320e5a9 Mon Sep 17 00:00:00 2001 From: souljaboy11792 Date: Wed, 7 Aug 2013 16:36:22 +0600 Subject: [PATCH 558/678] Experimenting with s2w and dt2w. --- drivers/input/keyboard/gpio_keys.c | 4 + drivers/input/touchscreen/ektf3k.c | 533 ++++++++++++++++++++++++----- include/linux/swwep2wake.h | 23 ++ 3 files changed, 469 insertions(+), 91 deletions(-) create mode 100644 include/linux/swwep2wake.h diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 69319792e98..138dadeed7e 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -32,6 +32,7 @@ #include #include #include +#include struct gpio_button_data { const struct gpio_keys_button *button; @@ -748,6 +749,9 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) } input_sync(input); + sweep2wake_setdev(input); + printk(KERN_INFO "[sweep2wake]: set device %s\n", input->name); + device_init_wakeup(&pdev->dev, wakeup); return 0; diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index 6e4b6993598..d9e544cd859 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -2,6 +2,9 @@ * * Copyright (C) 2011 Elan Microelectronics Corporation. * + * Sweep2wake and Doubletap2wake for Nexus 7 (flo) + * Copyright (C) 2013 Aaron Segaert (flar2) asegaert at gmail.com. All rights reserved. + * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. @@ -28,6 +31,8 @@ #include #include +#include + // for linux 2.6.36.3 #include #include @@ -89,6 +94,7 @@ #define IOCTL_RESUME _IOR(ELAN_IOCTLID, 14, int) #define IOCTL_FW_UPDATE _IOR(ELAN_IOCTLID, 22, int) +//don't use firmware update #define FIRMWARE_UPDATE_WITH_HEADER 1 uint16_t checksum_err=0; @@ -97,6 +103,7 @@ int FW_VERSION=0x00; int X_RESOLUTION=0x00; int Y_RESOLUTION=0x00; int FW_ID=0x00; +int BOOTCODE_VERSION=0x00; static int work_lock=0x00; #define USB_NO_Cable 0 @@ -106,6 +113,9 @@ static int work_lock=0x00; #define USB_Cable ((1 << (USB_SHIFT)) | (USB_DETECT_CABLE)) #define USB_AC_Adapter ((1 << (AC_SHIFT)) | (USB_DETECT_CABLE)) #define USB_CALBE_DETECT_MASK (USB_Cable | USB_DETECT_CABLE) +/*use for slim port to hdmi*/ +#define SLIM_HDMI_MODE 10 +#define HDMI_POWER_SOURCE_CMD 3 static unsigned now_usb_cable_status=0; static unsigned int gPrint_point = 0; @@ -162,16 +172,18 @@ struct elan_ktf3k_ts_data { static struct elan_ktf3k_ts_data *private_ts = NULL; static int __fw_packet_handler(struct i2c_client *client, int imediate); static int elan_ktf3k_ts_rough_calibrate(struct i2c_client *client); -static int elan_ktf3k_ts_hw_reset(struct i2c_client *client); +static int elan_ktf3k_ts_hw_reset(struct i2c_client *client, unsigned int time); static int elan_ktf3k_ts_resume(struct i2c_client *client); + #ifdef FIRMWARE_UPDATE_WITH_HEADER -static int firmware_update_header(struct i2c_client *client, const unsigned char *firmware, unsigned int page_number); +static int firmware_update_header(struct i2c_client *client, unsigned char *firmware, unsigned int page_number); #endif + static struct semaphore pSem; static int mTouchStatus[FINGER_NUM] = {0}; #define FIRMWARE_PAGE_SIZE 132 -#define MAX_FIRMWARE_SIZE 32868 +#define MAX_FIRMWARE_SIZE 52800 #define FIRMWARE_ACK_SIZE 2 /* Debug levels */ @@ -189,6 +201,192 @@ static int debug = DEBUG_INFO; printk("[ektf3k]:" __VA_ARGS__); \ } while (0) + +/* sweep2wake */ + +static struct input_dev *sweep2wake_pwrdev; +static DEFINE_MUTEX(s2w_lock); +int dt2w_switch = 1; +int dt2w_switch_temp = 1; +int dt2w_changed = 0; +int s2w_switch = 1; +int s2w_switch_temp = 1; +int s2w_changed = 0; +bool scr_suspended = false; +int tripon = 0; +int tripoff = 0; +unsigned long triptime = 0; +unsigned long dt2w_time[2] = {0, 0}; +unsigned int dt2w_x[2] = {0, 0}; +unsigned int dt2w_y[2] = {0, 0}; +int status[2] = {0,0}; +int dt2w_count = 0; +#define S2W_TIMEOUT 75 +#define DT2W_TIMEOUT_MAX 40 +#define DT2W_TIMEOUT_MIN 8 +#define DT2W_DELTA 60 + +void sweep2wake_setdev(struct input_dev * input_device) { + sweep2wake_pwrdev = input_device; + return; +} + +EXPORT_SYMBOL(sweep2wake_setdev); + +static void reset_sweep2wake(int s2w, int dt2w) +{ + //reset sweep2wake + if (s2w) { + tripoff = 0; + tripon = 0; + triptime = 0; + } + + //reset doubletap2wake + if (dt2w) { + dt2w_time[0] = 0; + dt2w_x[0] = 0; + dt2w_y[0] = 0; + dt2w_time[1] = 0; + dt2w_x[1] = 0; + dt2w_y[1] = 0; + dt2w_count = 0; + } + + return; +} + +static void sweep2wake_presspwr(struct work_struct *sweep2wake_presspwr_work) +{ + reset_sweep2wake(1,1); + + input_event(sweep2wake_pwrdev, EV_KEY, KEY_POWER, 1); + input_event(sweep2wake_pwrdev, EV_SYN, 0, 0); + msleep(20); + input_event(sweep2wake_pwrdev, EV_KEY, KEY_POWER, 0); + input_event(sweep2wake_pwrdev, EV_SYN, 0, 0); + msleep(20); + mutex_unlock(&s2w_lock); +} + +static DECLARE_WORK(sweep2wake_presspwr_work, sweep2wake_presspwr); + +void sweep2wake_pwrtrigger(void) +{ + if (mutex_trylock(&s2w_lock)) + schedule_work(&sweep2wake_presspwr_work); +} + +int sweep2wake_touch_check(int i) +{ + status[1] = status[0]; + status[0] = mTouchStatus[i]; + + if (status[0] != status[1]) { + return 0; + } else { + return 1; + } +} + +void sweep2wake_func(int x, int y, unsigned long time, int i) +{ + int sametouch = sweep2wake_touch_check(i); + + //printk("[sweep2wake]: x,y(%d,%d) jiffies:%lu\n", x, y, time); + + if (!sametouch){ + reset_sweep2wake(1,0); + return; + } + + //left->right + if (scr_suspended == true && s2w_switch == 1) { + if (y < 100) { + tripon = 1; + triptime = time; + } else if (tripon == 1 && y > 488 && time - triptime < 25) { + tripon = 2; + } else if (tripon == 2 && y > 896 && time - triptime < 50) { + tripon = 3; + } else if (tripon == 3 && y > 1150 && time - triptime < S2W_TIMEOUT) { + printk(KERN_INFO "[sweep2wake]: ON"); + sweep2wake_pwrtrigger(); + } + //right->left + } else if (scr_suspended == false && s2w_switch > 0 && x > 2000) { + if (y > 1250) { + tripoff = 1; + triptime = time; + } else if (tripoff == 1 && y < 896 && time - triptime < 25) { + tripoff = 2; + } else if (tripoff == 2 && y < 488 && time - triptime < 50) { + tripoff = 3; + } else if (tripoff == 3 && y < 100 && (time - triptime < S2W_TIMEOUT)) { + printk(KERN_INFO "[sweep2wake]: OFF"); + sweep2wake_pwrtrigger(); + } + } + +} + +void doubletap2wake_func(int x, int y) +{ + + int delta_x = 0; + int delta_y = 0; + + dt2w_count++; + + //printk("dt2w: time=%lu\n", jiffies); + + dt2w_time[1] = dt2w_time[0]; + dt2w_time[0] = jiffies; + + if ((dt2w_time[0] - dt2w_time[1]) > 45) { + dt2w_count = 0; + //printk("dt2w: reset dt2w_count\n"); + } + + if ((dt2w_time[0] - dt2w_time[1]) < 8 || dt2w_count > 1) { + //printk("dt2w: too fast, dt2w_count=%d\n", dt2w_count); + return; + } else { + dt2w_count = 0; + } + + dt2w_x[1] = dt2w_x[0]; + dt2w_x[0] = x; + dt2w_y[1] = dt2w_y[0]; + dt2w_y[0] = y; + + delta_x = (dt2w_x[0]-dt2w_x[1]); + delta_y = (dt2w_y[0]-dt2w_y[1]); + + if ((abs(delta_x) < DT2W_DELTA) && (abs(delta_y) < DT2W_DELTA)) { + + if (y > 50 && y < 1300 + && ((dt2w_time[0] - dt2w_time[1]) > DT2W_TIMEOUT_MIN) + && ((dt2w_time[0] - dt2w_time[1]) < DT2W_TIMEOUT_MAX)) { + + //printk("[dt2w]: OFF->ON\n"); + sweep2wake_pwrtrigger(); + + } else { + //printk("dt2w: wrong time\n"); + } + + } else { + //printk("dt2w: wrong spot\n"); + } + + return; +} + + +/* end sweep2wake */ + + int elan_iap_open(struct inode *inode, struct file *filp){ touch_debug(DEBUG_INFO, "[ELAN]into elan_iap_open\n"); if (private_ts == NULL) touch_debug(DEBUG_ERROR, "private_ts is NULL~~~"); @@ -260,7 +458,7 @@ static long elan_iap_ioctl(/*struct inode *inode,*/ struct file *filp, unsign case IOCTL_MINOR_FW_VER: break; case IOCTL_RESET: - return elan_ktf3k_ts_hw_reset(private_ts->client); + return elan_ktf3k_ts_hw_reset(private_ts->client, 0); case IOCTL_IAP_MODE_LOCK: work_lock=1; disable_irq(private_ts->client->irq); @@ -374,6 +572,72 @@ static ssize_t elan_show_status(struct device *dev, struct device_attribute *dev DEVICE_ATTR(elan_touchpanel_status, S_IRUGO, elan_show_status, NULL); + + +/* sweep2wake sysfs */ +static ssize_t elan_ktf3k_sweep2wake_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + size_t count = 0; + + if (s2w_switch == s2w_switch_temp ) + count += sprintf(buf, "%d\n", s2w_switch); + else + count += sprintf(buf, "%d->%d\n", s2w_switch, s2w_switch_temp); + + return count; +} + +static ssize_t elan_ktf3k_sweep2wake_dump(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + if (buf[0] >= '0' && buf[0] <= '2' && buf[1] == '\n') + if (s2w_switch != buf[0] - '0') { + s2w_switch_temp = buf[0] - '0'; + if (!scr_suspended) + s2w_switch = s2w_switch_temp; + else + s2w_changed = 1; + } + + return count; +} + +static DEVICE_ATTR(sweep2wake, (S_IWUSR|S_IRUGO), + elan_ktf3k_sweep2wake_show, elan_ktf3k_sweep2wake_dump); + +static ssize_t elan_ktf3k_doubletap2wake_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + size_t count = 0; + + if (dt2w_switch == dt2w_switch_temp) + count += sprintf(buf, "%d\n", dt2w_switch); + else + count += sprintf(buf, "%d->%d\n", dt2w_switch, dt2w_switch_temp); + + return count; +} + +static ssize_t elan_ktf3k_doubletap2wake_dump(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + if (buf[0] >= '0' && buf[0] <= '1' && buf[1] == '\n') + if (dt2w_switch != buf[0] - '0') { + dt2w_switch_temp = buf[0] - '0'; + if (!scr_suspended) + dt2w_switch = dt2w_switch_temp; + else + dt2w_changed = 1; + } + + return count; +} + +static DEVICE_ATTR(doubletap2wake, (S_IWUSR|S_IRUGO), + elan_ktf3k_doubletap2wake_show, elan_ktf3k_doubletap2wake_dump); + +/* end sweep2wake sysfs*/ + + static int check_fw_version(const unsigned char*firmware, unsigned int size, int fw_version){ int id, version; @@ -386,12 +650,18 @@ static int check_fw_version(const unsigned char*firmware, unsigned int size, int (firmware[size - 2*FIRMWARE_PAGE_SIZE + 123] << 8); touch_debug(DEBUG_INFO, "The firmware was version 0x%X and id:0x%X\n", version, id); - if(id == 0x3021) - return fw_version == 0xFFFF ? 1 : version - fw_version; // if the touch firmware was empty, always update firmware - else - return 0; // this buffer doesn't contain the touch firmware + + if (id == 0x3029 && BOOTCODE_VERSION >= 0x6046) { + /*if the touch firmware was empty, always update firmware*/ + return fw_version == 0xFFFF ? 1 : version - fw_version; + } else { + /*this buffer doesn't contain the touch firmware*/ + return 0; + } } + +/* static ssize_t update_firmware(struct device *dev, struct device_attribute *devattr,const char *buf, size_t count) { struct i2c_client *client = to_i2c_client(dev); @@ -427,7 +697,7 @@ static ssize_t update_firmware(struct device *dev, struct device_attribute *deva if(RECOVERY || check_fw_version(firmware, pos, ts->fw_ver) > 0){ touch_debug(DEBUG_INFO, "Firmware update start!\n"); do{ - ret = firmware_update_header(client, firmware, page_number); +// ret = firmware_update_header(client, firmware, page_number);//add by mars touch_debug(DEBUG_INFO, "Firmware update finish ret=%d retry=%d !\n", ret, retry++); }while(ret != 0 && retry < 3); if(ret == 0 && RECOVERY) RECOVERY = 0; @@ -436,20 +706,24 @@ static ssize_t update_firmware(struct device *dev, struct device_attribute *deva return count; } - -DEVICE_ATTR(update_fw, S_IWUSR, NULL, update_firmware); +*/ +//DEVICE_ATTR(update_fw, S_IWUSR, NULL, update_firmware); static struct attribute *elan_attr[] = { &dev_attr_elan_touchpanel_status.attr, &dev_attr_vendor.attr, &dev_attr_gpio.attr, - &dev_attr_update_fw.attr, + //&dev_attr_update_fw.attr, +/* sweep2wake sysfs */ + &dev_attr_sweep2wake.attr, + &dev_attr_doubletap2wake.attr, NULL }; static struct kobject *android_touch_kobj; + static int elan_ktf3k_touch_sysfs_init(void) { int ret ; @@ -460,7 +734,7 @@ static int elan_ktf3k_touch_sysfs_init(void) ret = -ENOMEM; return ret; } - ret = sysfs_create_file(android_touch_kobj, &dev_attr_gpio.attr); +/* ret = sysfs_create_file(android_touch_kobj, &dev_attr_gpio.attr); if (ret) { touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_file failed\n", __func__); return ret; @@ -470,13 +744,29 @@ static int elan_ktf3k_touch_sysfs_init(void) touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); return ret; } +*/ +/* sweep2wake sysfs */ + ret = sysfs_create_file(android_touch_kobj, &dev_attr_sweep2wake.attr); + if (ret) { + touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); + return ret; + } + ret = sysfs_create_file(android_touch_kobj, &dev_attr_doubletap2wake.attr); + if (ret) { + touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); + return ret; + } + return 0 ; } static void elan_touch_sysfs_deinit(void) { - sysfs_remove_file(android_touch_kobj, &dev_attr_vendor.attr); - sysfs_remove_file(android_touch_kobj, &dev_attr_gpio.attr); +// sysfs_remove_file(android_touch_kobj, &dev_attr_vendor.attr); +// sysfs_remove_file(android_touch_kobj, &dev_attr_gpio.attr); +/* sweep2wake sysfs */ + sysfs_remove_file(android_touch_kobj, &dev_attr_sweep2wake.attr); + sysfs_remove_file(android_touch_kobj, &dev_attr_doubletap2wake.attr); kobject_del(android_touch_kobj); } @@ -536,7 +826,7 @@ static int elan_ktf3k_ts_read_command(struct i2c_client *client, u8* cmd, u16 cmd_length, u8 *value, u16 value_length){ struct i2c_adapter *adapter = client->adapter; struct i2c_msg msg[2]; - __le16 le_addr; + //__le16 le_addr; struct elan_ktf3k_ts_data *ts; int length = 0; @@ -561,7 +851,7 @@ static int elan_ktf3k_i2c_read_packet(struct i2c_client *client, u8 *value, u16 value_length){ struct i2c_adapter *adapter = client->adapter; struct i2c_msg msg[1]; - __le16 le_addr; + //__le16 le_addr; struct elan_ktf3k_ts_data *ts; int length = 0; @@ -585,7 +875,7 @@ static int __hello_packet_handler(struct i2c_client *client) { int rc; uint8_t buf_recv[4] = { 0 }; - uint8_t buf_recv1[4] = { 0 }; + //uint8_t buf_recv1[4] = { 0 }; rc = elan_ktf3k_ts_poll(client); if (rc < 0) { @@ -615,7 +905,7 @@ static int wait_for_IRQ_Low(struct i2c_client *client, int utime){ return 0; }while(retry_times-- > 0); - touch_debug("Wait IRQ time out\n"); + touch_debug(DEBUG_INFO,"Wait IRQ time out\n"); return -1; } @@ -628,6 +918,7 @@ static int __fw_packet_handler(struct i2c_client *client, int immediate) uint8_t cmd_x[] = {0x53, 0x60, 0x00, 0x00}; /*Get x resolution*/ uint8_t cmd_y[] = {0x53, 0x63, 0x00, 0x00}; /*Get y resolution*/ uint8_t cmd_id[] = {0x53, 0xf0, 0x00, 0x01}; /*Get firmware ID*/ + uint8_t cmd_boot_id[] = {0x53, 0x10, 0x00, 0x01};/*Get boot code version*/ uint8_t buf_recv[4] = {0}; // Firmware version rc = elan_ktf3k_ts_read_command(client, cmd, 4, buf_recv, 4); @@ -683,6 +974,20 @@ static int __fw_packet_handler(struct i2c_client *client, int immediate) FW_ID = ts->fw_id; touch_debug(DEBUG_INFO, "[elan] %s: firmware id: 0x%4.4x\n", __func__, ts->fw_id); } +/*boot code version*/ + rc = elan_ktf3k_ts_read_command(client, cmd_boot_id, 4, buf_recv, 4); + if (rc < 0) + return rc; + + if (immediate) { + wait_for_IRQ_Low(client, 1000); + elan_ktf3k_i2c_read_packet(client, buf_recv, 4); + major = ((buf_recv[1] & 0x0f) << 4) | ((buf_recv[2] & 0xf0) >> 4); + minor = ((buf_recv[2] & 0x0f) << 4) | ((buf_recv[3] & 0xf0) >> 4); + + BOOTCODE_VERSION = major << 8 | minor; + touch_debug(DEBUG_INFO, "[elan] %s: boot code id: 0x%4.4x\n", __func__, BOOTCODE_VERSION); + } return 0; } @@ -707,7 +1012,7 @@ static int elan_ktf3k_ts_setup(struct i2c_client *client) int rc, count = 10; retry: // Reset - elan_ktf3k_ts_hw_reset(client); + elan_ktf3k_ts_hw_reset(client, 250); // Check if old firmware. If not, send the notmal_command to enter normal mode if( isOldFW(client) == 0 ){ //if check is new bootcode touch_debug(DEBUG_INFO, "The boot code is new!\n"); @@ -803,14 +1108,14 @@ static int elan_ktf3k_ts_get_power_state(struct i2c_client *client) return power_state; } -static int elan_ktf3k_ts_hw_reset(struct i2c_client *client) +static int elan_ktf3k_ts_hw_reset(struct i2c_client *client, unsigned int time) { struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); touch_debug(DEBUG_INFO, "[ELAN] Start HW reset!\n"); gpio_direction_output(ts->rst_gpio, 0); usleep_range(1000,1500); gpio_direction_output(ts->rst_gpio, 1); - msleep(250); + if(time) msleep(time); return 0; } @@ -821,9 +1126,11 @@ static int elan_ktf3k_ts_set_power_source(struct i2c_client *client, u8 state) int length = 0; dev_dbg(&client->dev, "[elan] %s: enter\n", __func__); - /*0x52 0x40 0x00 0x01 => Battery Mode - 0x52 0x41 0x00 0x01 => USB and AC Adapter Mode - */ + /* + 0x52 0x40 0x00 0x01 => Battery Mode + 0x52 0x41 0x00 0x01 => USB and AC Adapter Mode + 0x52 0x43 0x00 0x01 => SLIM Port to HDMI + */ cmd[1] |= state & 0x0F; dev_dbg(&client->dev, @@ -842,11 +1149,12 @@ static int elan_ktf3k_ts_set_power_source(struct i2c_client *client, u8 state) return 0; } +/* static int elan_ktf3k_ts_get_power_source(struct i2c_client *client) { int rc = 0; uint8_t cmd[] = {CMD_R_PKT, 0x40, 0x00, 0x01}; - uint8_t buf[4] = {0}, power_source; + uint8_t buf[4] = {0}; //rc = elan_ktf2k_ts_get_data(client, cmd, buf, 4); rc = elan_ktf3k_ts_read_command(client, cmd, 4, buf, 4); @@ -855,12 +1163,17 @@ static int elan_ktf3k_ts_get_power_source(struct i2c_client *client) return 0; } +*/ -static void update_power_source(){ +static void update_power_source(void){ unsigned power_source = now_usb_cable_status; if(private_ts == NULL || work_lock) return; // Send power state 1 if USB cable and AC charger was plugged on. - elan_ktf3k_ts_set_power_source(private_ts->client, power_source != USB_NO_Cable); + if (power_source == SLIM_HDMI_MODE) { + elan_ktf3k_ts_set_power_source(private_ts->client, HDMI_POWER_SOURCE_CMD); + } else { + elan_ktf3k_ts_set_power_source(private_ts->client, power_source != USB_NO_Cable); + } } void touch_callback(unsigned cable_status){ @@ -893,7 +1206,7 @@ static void elan_ktf3k_ts_report_data(struct i2c_client *client, uint8_t *buf) { struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); struct input_dev *idev = ts->input_dev; - uint16_t x, y, touch_size, pressure_size; + uint16_t x = 0, y = 0, touch_size, pressure_size; uint16_t fbits=0, checksum=0; uint8_t i, num; static uint8_t size_index[10] = {35, 35, 36, 36, 37, 37, 38, 38, 39, 39}; @@ -923,19 +1236,21 @@ static void elan_ktf3k_ts_report_data(struct i2c_client *client, uint8_t *buf) input_report_abs(idev, ABS_MT_POSITION_X, y); input_report_abs(idev, ABS_MT_POSITION_Y, x); if(unlikely(gPrint_point)) touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); - } + + } } mTouchStatus[i] = active; fbits = fbits >> 1; idx += 3; } + input_sync(idev); } // checksum else { checksum_err +=1; touch_debug(DEBUG_ERROR, "[elan] Checksum Error %d byte[2]=%X\n", checksum_err, buf[2]); } - + return; } @@ -964,7 +1279,7 @@ static void elan_ktf3k_ts_report_data2(struct i2c_client *client, uint8_t *buf) input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, active); if(active){ elan_ktf3k_ts_parse_xy(&buf[idx], &x, &y); - x = x > ts->abs_x_max ? 0 : ts->abs_x_max - x; + x = x > ts->abs_x_max ? 0 : ts->abs_x_max - x; y = y > ts->abs_y_max ? ts->abs_y_max : y; touch_size = buf[35 + i]; pressure_size = buf[45 + i]; @@ -973,12 +1288,19 @@ static void elan_ktf3k_ts_report_data2(struct i2c_client *client, uint8_t *buf) input_report_abs(idev, ABS_MT_POSITION_X, y); input_report_abs(idev, ABS_MT_POSITION_Y, x); if(unlikely(gPrint_point)) touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); - } - } + +/* sweep2wake */ + if (s2w_switch > 0) + sweep2wake_func(x, y, jiffies, i); + if (dt2w_switch && scr_suspended) + doubletap2wake_func(x, y); +/* end sweep2wake */ + } + } mTouchStatus[i] = active; fbits = fbits >> 1; idx += 3; - } + } input_sync(idev); } // checksum else { @@ -1118,7 +1440,7 @@ static irqreturn_t elan_ktf3k_ts_irq_handler(int irq, void *dev_id) { struct elan_ktf3k_ts_data *ts = dev_id; struct i2c_client *client = ts->client; - + dev_dbg(&client->dev, "[elan] %s\n", __func__); disable_irq_nosync(ts->client->irq); queue_work(ts->elan_wq, &ts->work); @@ -1216,6 +1538,7 @@ static int ektf_proc_write(struct file *file, const char *buffer, unsigned long } #endif // #ifdef _ENABLE_DBG_LEV + #ifdef FIRMWARE_UPDATE_WITH_HEADER #define FIRMWARE_PAGE_SIZE 132 static unsigned char touch_firmware[] = { @@ -1225,21 +1548,25 @@ static unsigned char touch_firmware[] = { #define SIZE_PER_PACKET 4 static int sendI2CPacket(struct i2c_client *client, const unsigned char *buf, unsigned int length){ - int ret, i, retry_times = 10; + int ret, i; + int retry_times = 10; for(i = 0; i < length; i += ret){ ret = i2c_master_send(client, buf + i, length < SIZE_PER_PACKET ? length : SIZE_PER_PACKET); - if(ret <= 0){ + + if(ret <= 0){ retry_times--; ret = 0; - } + } if(ret < (length < SIZE_PER_PACKET ? length : SIZE_PER_PACKET)){ - touch_debug("Sending packet broken\n"); + touch_debug(DEBUG_INFO,"Sending packet broken\n"); + //printk("[ektf3k]:Sending packet broken\n"); } - if(retry_times < 0){ - touch_debug("Failed sending I2C touch firmware packet.\n"); + touch_debug(DEBUG_INFO,"Failed sending I2C touch firmware packet.\n"); + //printk("[ektf3k]:Failed sending I2C touch firmware packet.\n"); break; } + } return i; @@ -1255,7 +1582,8 @@ static int recvI2CPacket(struct i2c_client *client, unsigned char *buf, unsigned } if(retry_times < 0){ - touch_debug("Failed sending I2C touch firmware packet.\n"); + touch_debug(DEBUG_INFO,"Failed sending I2C touch firmware packet.\n"); + //printk("[ektf3k]:Failed sending I2C touch firmware packet.\n"); break; } } @@ -1264,14 +1592,15 @@ static int recvI2CPacket(struct i2c_client *client, unsigned char *buf, unsigned } -static int firmware_update_header(struct i2c_client *client, const unsigned char *firmware, unsigned int pages_number){ - int ret, i, mode; - int retry_times = 3, write_times; +static int firmware_update_header(struct i2c_client *client, unsigned char *firmware, unsigned int pages_number){ + + int ret, i; + int sendCount; + int recvCount; + int write_times; unsigned char packet_data[8] = {0}; - unsigned char isp_cmd[4] = {0x54, 0x00, 0x12, 0x34}; unsigned char nb_isp_cmd[4] = {0x45, 0x49, 0x41, 0x50}; unsigned char *cursor; - int boot_code = 0; struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); if(ts == NULL) @@ -1281,27 +1610,13 @@ static int firmware_update_header(struct i2c_client *client, const unsigned char disable_irq(client->irq); // Blocking call no need to do extra wait wake_lock(&ts->wakelock); work_lock = 1; - elan_ktf3k_ts_hw_reset(client); - // Step 1: Check boot code version - boot_code = gpio_get_value(ts->intr_gpio); - if(boot_code == 0){ // if the boot code is old - touch_debug(DEBUG_INFO, "The firmware update of old boot code\n"); - if(recvI2CPacket(client, packet_data, 4) < 0) - goto fw_update_failed; - - touch_debug(DEBUG_INFO, "The received bytes 0x%X 0x%X 0x%X 0x%X\n", packet_data[0], packet_data[1], - packet_data[2], packet_data[3]); - if(packet_data[0] == 0x55 && packet_data[1] == 0x55 && packet_data[2] == 0x80 && packet_data[3] == 0x80) - touch_debug(DEBUG_INFO, "In the recovery mode\n"); + /*add delay for waiting bootcode initial*/ + elan_ktf3k_ts_hw_reset(client, 20); + touch_debug(DEBUG_INFO, "Send command into IAP mode\n"); + /*get into IAP mode*/ + if (sendI2CPacket(client, nb_isp_cmd, sizeof(nb_isp_cmd)) < 0) + goto fw_update_failed; - if(sendI2CPacket(client, isp_cmd, sizeof(isp_cmd)) < 0) // get into ISP mode - goto fw_update_failed; - }else{ // if the boot code is new - touch_debug(DEBUG_INFO, "The firmware update of new boot code\n"); - if(sendI2CPacket(client, nb_isp_cmd, sizeof(nb_isp_cmd)) < 0) // get into ISP mode - goto fw_update_failed; - } - msleep(100); packet_data[0] = 0x10; if(sendI2CPacket(client, packet_data, 1) < 0) // send dummy byte @@ -1314,14 +1629,14 @@ static int firmware_update_header(struct i2c_client *client, const unsigned char page_write_retry: touch_debug(DEBUG_MESSAGES, "Update page number %d\n", i); - int sendCount; + if((sendCount = sendI2CPacket(client, cursor, FIRMWARE_PAGE_SIZE)) != FIRMWARE_PAGE_SIZE){ dev_err(&client->dev, "Fail to Update page number %d\n", i); goto fw_update_failed; } touch_debug(DEBUG_INFO, "sendI2CPacket send %d bytes\n", sendCount); - int recvCount; + msleep(25); if((recvCount = recvI2CPacket(client, packet_data, FIRMWARE_ACK_SIZE)) != FIRMWARE_ACK_SIZE){ dev_err(&client->dev, "Fail to Update page number %d\n", i); goto fw_update_failed; @@ -1329,6 +1644,7 @@ static int firmware_update_header(struct i2c_client *client, const unsigned char touch_debug(DEBUG_INFO, "recvI2CPacket recv %d bytes: %x %x\n", recvCount, packet_data[0], packet_data[1]); + if(packet_data[0] != 0xaa || packet_data[1] != 0xaa){ touch_debug(DEBUG_INFO, "message received: %02X %02X Page %d rewrite\n", packet_data[0], packet_data[1], i); if(write_times++ > 3) @@ -1336,22 +1652,27 @@ static int firmware_update_header(struct i2c_client *client, const unsigned char goto page_write_retry; } + cursor += FIRMWARE_PAGE_SIZE; } - elan_ktf3k_ts_hw_reset(client); - if(boot_code) - msleep(2000); - else - msleep(300); - if(recvI2CPacket(client, packet_data, 4) < 0) - goto fw_update_failed; - __fw_packet_handler(ts->client, 1); + elan_ktf3k_ts_hw_reset(client, 0); + + /*check irq*/ + wait_for_IRQ_Low(client, 500000);/*500ms * 10*/ + + if (recvI2CPacket(client, packet_data, 4) < 0) + goto fw_update_failed; + /*add debug message for hello packet*/ + touch_debug(DEBUG_INFO, "[elan] %s: hello packet %2x:%2X:%2x:%2x\n", __func__, packet_data[0], packet_data[1], packet_data[2], packet_data[3]); + + __fw_packet_handler(ts->client, 1); ret = 0; goto fw_update_finish; fw_update_failed: ret = -1; + touch_debug(DEBUG_INFO, "Failed the touch firmware update!\n"); fw_update_finish: work_lock = 0; wake_unlock(&ts->wakelock); @@ -1397,11 +1718,11 @@ int elan_stress_release(struct inode *inode, struct file *filp) return 0; /* success */ } -int elan_stress_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +long elan_stress_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int err = 1; - printk("%s\n", __func__, cmd); + printk("[elan_stress_ioctl]%d\n", cmd); if (_IOC_TYPE(cmd) != STRESS_IOC_MAGIC) return -ENOTTY; if (_IOC_NR(cmd) > STRESS_IOC_MAXNR) @@ -1519,7 +1840,7 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, __set_bit(EV_ABS, ts->input_dev->evbit); __set_bit(EV_SYN, ts->input_dev->evbit); __set_bit(EV_KEY, ts->input_dev->evbit); - + __set_bit(INPUT_PROP_DIRECT, ts->input_dev->propbit); err = input_register_device(ts->input_dev); if (err) { @@ -1535,9 +1856,10 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, touch_debug(DEBUG_INFO, "[elan]%s: handle missed interrupt\n", __func__); elan_ktf3k_ts_irq_handler(client->irq, ts); } + #ifdef FIRMWARE_UPDATE_WITH_HEADER - if(RECOVERY || check_fw_version(touch_firmware, sizeof(touch_firmware), ts->fw_ver) > 0) + if (RECOVERY || check_fw_version(touch_firmware, sizeof(touch_firmware), ts->fw_ver) > 0) firmware_update_header(client, touch_firmware, sizeof(touch_firmware)/FIRMWARE_PAGE_SIZE); #endif @@ -1550,7 +1872,7 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, private_ts = ts; - //elan_ktf2k_touch_sysfs_init(); + elan_ktf3k_touch_sysfs_init(); ts->attrs.attrs = elan_attr; err = sysfs_create_group(&client->dev.kobj, &ts->attrs); if (err) { @@ -1617,9 +1939,9 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, input_free_device(ts->input_dev); err_input_dev_alloc_failed: -err_detect_failed: - if (ts->elan_wq) - destroy_workqueue(ts->elan_wq); +//err_detect_failed: +// if (ts->elan_wq) +// destroy_workqueue(ts->elan_wq); err_create_wq_failed: kfree(ts); @@ -1673,14 +1995,24 @@ static int elan_ktf3k_ts_suspend(struct i2c_client *client, pm_message_t mesg) int rc = 0; touch_debug(DEBUG_INFO, "[elan] %s: enter\n", __func__); - disable_irq(client->irq); + +/*s2w*/ + if (s2w_switch == 1 || dt2w_switch == 1) { + enable_irq_wake(client->irq); + } else { + disable_irq(client->irq); + } + force_release_pos(client); rc = cancel_work_sync(&ts->work); if (rc) enable_irq(client->irq); - if(work_lock == 0) +/*s2w*/ + if((s2w_switch != 1 && !dt2w_switch) && work_lock == 0) rc = elan_ktf3k_ts_set_power_state(client, PWR_STATE_DEEP_SLEEP); +/*s2w*/ + scr_suspended = true; return 0; } @@ -1689,8 +2021,11 @@ static int elan_ktf3k_ts_resume(struct i2c_client *client) { int rc = 0, retry = 5; - struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); - int delay_time; + //struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); + //int delay_time; + + //gpio_direction_output(31, 0); + touch_debug(DEBUG_INFO, "[elan] %s: enter\n", __func__); if(work_lock == 0){ do { @@ -1704,7 +2039,22 @@ static int elan_ktf3k_ts_resume(struct i2c_client *client) } while (--retry); } //force_release_pos(client); - enable_irq(client->irq); + +/* s2w */ + if (s2w_switch == 1 || dt2w_switch == 1) { + disable_irq_wake(client->irq); + } else { + enable_irq(client->irq); + } + + if (s2w_changed) + s2w_switch = s2w_switch_temp; + if (dt2w_changed) + dt2w_switch = dt2w_switch_temp; + + scr_suspended = false; +/* end s2w */ + return 0; } @@ -1759,3 +2109,4 @@ module_exit(elan_ktf3k_ts_exit); MODULE_DESCRIPTION("ELAN KTF3K Touchscreen Driver"); MODULE_LICENSE("GPL"); + diff --git a/include/linux/swwep2wake.h b/include/linux/swwep2wake.h new file mode 100644 index 00000000000..a1e2f5c3e16 --- /dev/null +++ b/include/linux/swwep2wake.h @@ -0,0 +1,23 @@ +/* +* include/linux/sweep2wake.h +* +* Copyright (c) 2013, Aaron Segaert (flar2) asegaert at gmail.com +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +* more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + + +extern void sweep2wake_setdev(struct input_dev * input_device); + From 3b9349f79048fa495efdc514bade3f33a19f04ab Mon Sep 17 00:00:00 2001 From: souljaboy11792 Date: Thu, 8 Aug 2013 12:54:54 +0600 Subject: [PATCH 559/678] Reworked s2w and dt2w from flar2's kernel. --- drivers/input/touchscreen/ektf3k.c | 314 +++++++++++++++++++---------- 1 file changed, 212 insertions(+), 102 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index d9e544cd859..e1f43276845 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -213,17 +213,29 @@ int s2w_switch = 1; int s2w_switch_temp = 1; int s2w_changed = 0; bool scr_suspended = false; -int tripon = 0; -int tripoff = 0; -unsigned long triptime = 0; +int tripoff_vl = 0; +int tripoff_vr = 0; +int tripoff_hd = 0; +int tripoff_hu = 0; +int tripon_vl = 0; +int tripon_vr = 0; +int tripon_hd = 0; +int tripon_hu = 0; +unsigned long triptime_vl = 0; +unsigned long triptime_vr = 0; +unsigned long triptime_hd = 0; +unsigned long triptime_hu = 0; unsigned long dt2w_time[2] = {0, 0}; unsigned int dt2w_x[2] = {0, 0}; unsigned int dt2w_y[2] = {0, 0}; +unsigned int dt2w_2_x[2] = {0, 0}; +unsigned int dt2w_2_y[2] = {0, 0}; int status[2] = {0,0}; int dt2w_count = 0; -#define S2W_TIMEOUT 75 +int is_suspended = 0; +#define S2W_TIMEOUT 50 #define DT2W_TIMEOUT_MAX 40 -#define DT2W_TIMEOUT_MIN 8 +#define DT2W_TIMEOUT_MIN 9 #define DT2W_DELTA 60 void sweep2wake_setdev(struct input_dev * input_device) { @@ -237,9 +249,18 @@ static void reset_sweep2wake(int s2w, int dt2w) { //reset sweep2wake if (s2w) { - tripoff = 0; - tripon = 0; - triptime = 0; + tripoff_vl = 0; + tripoff_vr = 0; + tripoff_hd = 0; + tripoff_hu = 0; + tripon_vl = 0; + tripon_vr = 0; + tripon_hd = 0; + tripon_hu = 0; + triptime_vl = 0; + triptime_vr = 0; + triptime_hd = 0; + triptime_hu = 0; } //reset doubletap2wake @@ -250,7 +271,10 @@ static void reset_sweep2wake(int s2w, int dt2w) dt2w_time[1] = 0; dt2w_x[1] = 0; dt2w_y[1] = 0; - dt2w_count = 0; + dt2w_2_x[0] = 0; + dt2w_2_x[1] = 0; + dt2w_2_y[0] = 0; + dt2w_2_y[1] = 0; } return; @@ -277,7 +301,7 @@ void sweep2wake_pwrtrigger(void) schedule_work(&sweep2wake_presspwr_work); } -int sweep2wake_touch_check(int i) +/*int sweep2wake_touch_check(int i) { status[1] = status[0]; status[0] = mTouchStatus[i]; @@ -287,47 +311,121 @@ int sweep2wake_touch_check(int i) } else { return 1; } -} +}*/ -void sweep2wake_func(int x, int y, unsigned long time, int i) +void sweep2wake_func(int x, int y, unsigned long time) { - int sametouch = sweep2wake_touch_check(i); + //int sametouch = sweep2wake_touch_check(i); //printk("[sweep2wake]: x,y(%d,%d) jiffies:%lu\n", x, y, time); - if (!sametouch){ + //if (!sametouch){ + if (x < 0){ reset_sweep2wake(1,0); return; } - //left->right if (scr_suspended == true && s2w_switch == 1) { + //left->right if (y < 100) { - tripon = 1; - triptime = time; - } else if (tripon == 1 && y > 488 && time - triptime < 25) { - tripon = 2; - } else if (tripon == 2 && y > 896 && time - triptime < 50) { - tripon = 3; - } else if (tripon == 3 && y > 1150 && time - triptime < S2W_TIMEOUT) { + tripon_vr = 1; + triptime_vr = time; + } else if (tripon_vr == 1 && y > 488 && time - triptime_vr < 20) { + tripon_vr = 2; + } else if (tripon_vr == 2 && y > 896 && time - triptime_vr < 40) { + tripon_vr = 3; + } else if (tripon_vr == 3 && (y > 1250) && time - triptime_vr < S2W_TIMEOUT) { + printk(KERN_INFO "[sweep2wake]: ON"); + sweep2wake_pwrtrigger(); + } + //right->left + if (y > 1240) { + tripon_vl = 1; + triptime_vl = time; + } else if (tripon_vl == 1 && y < 896 && time - triptime_vl < 20) { + tripon_vl = 2; + } else if (tripon_vl == 2 && y < 488 && time - triptime_vl < 40) { + tripon_vl = 3; + } else if (tripon_vl == 3 && y < 100 && (time - triptime_vl < S2W_TIMEOUT)) { printk(KERN_INFO "[sweep2wake]: ON"); sweep2wake_pwrtrigger(); - } - //right->left - } else if (scr_suspended == false && s2w_switch > 0 && x > 2000) { - if (y > 1250) { - tripoff = 1; - triptime = time; - } else if (tripoff == 1 && y < 896 && time - triptime < 25) { - tripoff = 2; - } else if (tripoff == 2 && y < 488 && time - triptime < 50) { - tripoff = 3; - } else if (tripoff == 3 && y < 100 && (time - triptime < S2W_TIMEOUT)) { + } + //top->bottom + if (x < 250) { + tripon_hd = 1; + triptime_hd = time; + } else if (tripon_hd == 1 && x > 748 && time - triptime_hd < 25) { + tripon_hd = 2; + } else if (tripon_hd == 2 && x > 1496 && time - triptime_hd < 45) { + tripon_hd = 3; + } else if (tripon_hd == 3 && x > 2000 && (time - triptime_hd < S2W_TIMEOUT)) { + printk(KERN_INFO "[sweep2wake]: ON"); + sweep2wake_pwrtrigger(); + } + //bottom->top + if (x > 2000) { + tripon_hu = 1; + triptime_hu = time; + } else if (tripon_hu == 1 && x < 1496 && time - triptime_hu < 25) { + tripon_hu = 2; + } else if (tripon_hu == 2 && x < 748 && time - triptime_hu < 45) { + tripon_hu = 3; + } else if (tripon_hu == 3 && x < 250 && (time - triptime_hu < S2W_TIMEOUT)) { + printk(KERN_INFO "[sweep2wake]: ON"); + sweep2wake_pwrtrigger(); + } + } + + if (scr_suspended == false && s2w_switch > 0) { + //right->left portrait mode normal + if (y > 1250 && x > 2140) { + tripoff_vl = 1; + triptime_vl = time; + } else if (tripoff_vl == 1 && y < 896 && time - triptime_vl < 20) { + tripoff_vl = 2; + } else if (tripoff_vl == 2 && y < 488 && time - triptime_vl < 40) { + tripoff_vl = 3; + } else if (tripoff_vl == 3 && y < 200 && (time - triptime_vl < S2W_TIMEOUT)) { + printk(KERN_INFO "[sweep2wake]: OFF"); + sweep2wake_pwrtrigger(); + } + //left->right portrait mode upside down + if (y < 100 && x < 100) { + tripoff_vr = 1; + triptime_vr = time; + } else if (tripoff_vr == 1 && y > 488 && time - triptime_vr < 20) { + tripoff_vr = 2; + } else if (tripoff_vr == 2 && y > 896 && time - triptime_vr < 40) { + tripoff_vr = 3; + } else if (tripoff_vr == 3 && y > 1150 && time - triptime_vr < S2W_TIMEOUT) { + printk(KERN_INFO "[sweep2wake]: OFF"); + sweep2wake_pwrtrigger(); + } + //top->bottom + if (x < 250 && y > 1244) { + tripoff_hd = 1; + triptime_hd = time; + } else if (tripoff_hd == 1 && x > 748 && time - triptime_hd < 25) { + tripoff_hd = 2; + } else if (tripoff_hd == 2 && x > 1496 && time - triptime_hd < 45) { + tripoff_hd = 3; + } else if (tripoff_hd == 3 && x > 2050 && (time - triptime_hd < S2W_TIMEOUT)) { + printk(KERN_INFO "[sweep2wake]: OFF"); + sweep2wake_pwrtrigger(); + } + //bottom->top + if (x > 2000 && y < 100) { + tripoff_hu = 1; + triptime_hu = time; + } else if (tripoff_hu == 1 && x < 1496 && time - triptime_hu < 25) { + tripoff_hu = 2; + } else if (tripoff_hu == 2 && x < 748 && time - triptime_hu < 45) { + tripoff_hu = 3; + } else if (tripoff_hu == 3 && x < 200 && (time - triptime_hu < S2W_TIMEOUT)) { printk(KERN_INFO "[sweep2wake]: OFF"); sweep2wake_pwrtrigger(); } } - } void doubletap2wake_func(int x, int y) @@ -336,48 +434,39 @@ void doubletap2wake_func(int x, int y) int delta_x = 0; int delta_y = 0; - dt2w_count++; - - //printk("dt2w: time=%lu\n", jiffies); - - dt2w_time[1] = dt2w_time[0]; - dt2w_time[0] = jiffies; - - if ((dt2w_time[0] - dt2w_time[1]) > 45) { - dt2w_count = 0; - //printk("dt2w: reset dt2w_count\n"); - } - - if ((dt2w_time[0] - dt2w_time[1]) < 8 || dt2w_count > 1) { - //printk("dt2w: too fast, dt2w_count=%d\n", dt2w_count); - return; - } else { - dt2w_count = 0; - } + printk("x=%d y=%d\n", x, y); dt2w_x[1] = dt2w_x[0]; - dt2w_x[0] = x; + dt2w_x[0] = x; dt2w_y[1] = dt2w_y[0]; - dt2w_y[0] = y; - - delta_x = (dt2w_x[0]-dt2w_x[1]); - delta_y = (dt2w_y[0]-dt2w_y[1]); + dt2w_y[0] = y; + + if (x < 0) { + dt2w_2_x[1] = dt2w_2_x[0]; + dt2w_2_x[0] = dt2w_x[1]; + dt2w_2_y[1] = dt2w_2_y[0]; + dt2w_2_y[0] = dt2w_y[1]; + dt2w_time[1] = dt2w_time[0]; + dt2w_time[0] = jiffies; + + printk("x0=%d x1=%d time0=%lu time1=%lu\n", dt2w_2_x[0], dt2w_2_x[1], dt2w_time[0], dt2w_time[1]); - if ((abs(delta_x) < DT2W_DELTA) && (abs(delta_y) < DT2W_DELTA)) { + delta_x = (dt2w_2_x[0]-dt2w_2_x[1]); + delta_y = (dt2w_2_y[0]-dt2w_2_y[1]); - if (y > 50 && y < 1300 - && ((dt2w_time[0] - dt2w_time[1]) > DT2W_TIMEOUT_MIN) - && ((dt2w_time[0] - dt2w_time[1]) < DT2W_TIMEOUT_MAX)) { + if ((abs(delta_x) < DT2W_DELTA) && (abs(delta_y) < DT2W_DELTA)) { + if ( ((dt2w_time[0] - dt2w_time[1]) > DT2W_TIMEOUT_MIN) + && ((dt2w_time[0] - dt2w_time[1]) < DT2W_TIMEOUT_MAX)) { - //printk("[dt2w]: OFF->ON\n"); - sweep2wake_pwrtrigger(); + printk("[dt2w]: OFF->ON\n"); + sweep2wake_pwrtrigger(); + } else { + printk("dt2w: wrong time\n"); + } } else { - //printk("dt2w: wrong time\n"); + printk("dt2w: wrong spot\n"); } - - } else { - //printk("dt2w: wrong spot\n"); } return; @@ -1264,50 +1353,69 @@ static void elan_ktf3k_ts_report_data2(struct i2c_client *client, uint8_t *buf) uint16_t active = 0; uint8_t idx=IDX_FINGER; - num = buf[2] & 0xf; + printk("buffer=%d\n", *buf); + + num = buf[2] & 0xf; for (i=0; i<34;i++) checksum +=buf[i]; - - if ( (num < 3) || ((checksum & 0x00ff) == buf[34])) { - fbits = buf[2] & 0x30; - fbits = (fbits << 4) | buf[1]; - //input_report_key(idev, BTN_TOUCH, 1); - for(i = 0; i < FINGER_NUM; i++){ - active = fbits & 0x1; - if(active || mTouchStatus[i]){ - input_mt_slot(ts->input_dev, i); - input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, active); - if(active){ - elan_ktf3k_ts_parse_xy(&buf[idx], &x, &y); - x = x > ts->abs_x_max ? 0 : ts->abs_x_max - x; - y = y > ts->abs_y_max ? ts->abs_y_max : y; - touch_size = buf[35 + i]; - pressure_size = buf[45 + i]; - input_report_abs(idev, ABS_MT_TOUCH_MAJOR, touch_size); - input_report_abs(idev, ABS_MT_PRESSURE, pressure_size); - input_report_abs(idev, ABS_MT_POSITION_X, y); - input_report_abs(idev, ABS_MT_POSITION_Y, x); - if(unlikely(gPrint_point)) touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); -/* sweep2wake */ - if (s2w_switch > 0) - sweep2wake_func(x, y, jiffies, i); - if (dt2w_switch && scr_suspended) - doubletap2wake_func(x, y); -/* end sweep2wake */ - } - } - mTouchStatus[i] = active; - fbits = fbits >> 1; - idx += 3; - } - input_sync(idev); + printk("num=%d\n", num); + printk("checksum=%d\n", checksum); + + if ( (num < 3) || ((checksum & 0x00ff) == buf[34])) { + fbits = buf[2] & 0x30; + fbits = (fbits << 4) | buf[1]; + + printk("fbits=%d\n", fbits); + + //input_report_key(idev, BTN_TOUCH, 1); + + for(i = 0; i < FINGER_NUM; i++){ + active = fbits & 0x1; + if(active || mTouchStatus[i]){ + input_mt_slot(ts->input_dev, i); + input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, active); + if(active){ + elan_ktf3k_ts_parse_xy(&buf[idx], &x, &y); + x = x > ts->abs_x_max ? 0 : ts->abs_x_max - x; + y = y > ts->abs_y_max ? ts->abs_y_max : y; + touch_size = buf[35 + i]; + pressure_size = buf[45 + i]; + input_report_abs(idev, ABS_MT_TOUCH_MAJOR, touch_size); + input_report_abs(idev, ABS_MT_PRESSURE, pressure_size); + input_report_abs(idev, ABS_MT_POSITION_X, y); + input_report_abs(idev, ABS_MT_POSITION_Y, x); + if(unlikely(gPrint_point)) + touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); + /* sweep2wake */ + if (s2w_switch > 0) + sweep2wake_func(x, y, jiffies); + if (dt2w_switch && scr_suspended) + doubletap2wake_func(x, y); + /* end sweep2wake */ + } + } + mTouchStatus[i] = active; + fbits = fbits >> 1; + idx += 3; + } + input_sync(idev); } // checksum + else { checksum_err +=1; touch_debug(DEBUG_ERROR, "[elan] Checksum Error %d byte[2]=%X\n", checksum_err, buf[2]); } + /* sweep2wake */ + if (checksum == 99) { + if (s2w_switch > 0) + sweep2wake_func(-1, -1, jiffies); + if (dt2w_switch && scr_suspended) + doubletap2wake_func(-1, -1); + } + /* end sweep2wake */ + return; } @@ -2063,6 +2171,7 @@ static void elan_ktf3k_ts_early_suspend(struct early_suspend *h) { struct elan_ktf3k_ts_data *ts; ts = container_of(h, struct elan_ktf3k_ts_data, early_suspend); + is_suspended = 1; elan_ktf3k_ts_suspend(ts->client, PMSG_SUSPEND); } @@ -2070,6 +2179,7 @@ static void elan_ktf3k_ts_late_resume(struct early_suspend *h) { struct elan_ktf3k_ts_data *ts; ts = container_of(h, struct elan_ktf3k_ts_data, early_suspend); + is_suspended = 0; elan_ktf3k_ts_resume(ts->client); } #endif From da3a94ee5e5ac68bed4e642f6a92debe3b1e59ef Mon Sep 17 00:00:00 2001 From: souljaboy11792 Date: Mon, 12 Aug 2013 11:50:46 +0600 Subject: [PATCH 560/678] Build including the new vars for the hotplug (although they are not used as of yet). Add shortsweep option for s2w. Multiple sweeps are suppressed. Conflicts: arch/arm/configs/metallice_grouper_defconfig --- drivers/input/touchscreen/ektf3k.c | 150 ++++++++++++++++------------- include/linux/swwep2wake.h | 23 ----- 2 files changed, 81 insertions(+), 92 deletions(-) delete mode 100644 include/linux/swwep2wake.h diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index e1f43276845..4a27e964a7e 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -212,6 +212,11 @@ int dt2w_changed = 0; int s2w_switch = 1; int s2w_switch_temp = 1; int s2w_changed = 0; +int s2w_begin_v = 150; +int s2w_end_v = 1200; +int s2w_begin_h = 350; +int s2w_end_h = 1900; +int shortsweep = 0; bool scr_suspended = false; int tripoff_vl = 0; int tripoff_vr = 0; @@ -230,13 +235,11 @@ unsigned int dt2w_x[2] = {0, 0}; unsigned int dt2w_y[2] = {0, 0}; unsigned int dt2w_2_x[2] = {0, 0}; unsigned int dt2w_2_y[2] = {0, 0}; -int status[2] = {0,0}; -int dt2w_count = 0; -int is_suspended = 0; +//int is_suspended = 0; #define S2W_TIMEOUT 50 -#define DT2W_TIMEOUT_MAX 40 -#define DT2W_TIMEOUT_MIN 9 -#define DT2W_DELTA 60 +#define DT2W_TIMEOUT_MAX 50 +#define DT2W_TIMEOUT_MIN 4 +#define DT2W_DELTA 150 void sweep2wake_setdev(struct input_dev * input_device) { sweep2wake_pwrdev = input_device; @@ -301,128 +304,111 @@ void sweep2wake_pwrtrigger(void) schedule_work(&sweep2wake_presspwr_work); } -/*int sweep2wake_touch_check(int i) +void sweep2wake_func(int x, int y, unsigned long time, int i) { - status[1] = status[0]; - status[0] = mTouchStatus[i]; - - if (status[0] != status[1]) { - return 0; - } else { - return 1; - } -}*/ - -void sweep2wake_func(int x, int y, unsigned long time) -{ - //int sametouch = sweep2wake_touch_check(i); - - //printk("[sweep2wake]: x,y(%d,%d) jiffies:%lu\n", x, y, time); - - //if (!sametouch){ - if (x < 0){ + if (x < 0 || i > 0){ reset_sweep2wake(1,0); return; } if (scr_suspended == true && s2w_switch == 1) { //left->right - if (y < 100) { + if (y < s2w_begin_v) { tripon_vr = 1; triptime_vr = time; } else if (tripon_vr == 1 && y > 488 && time - triptime_vr < 20) { tripon_vr = 2; } else if (tripon_vr == 2 && y > 896 && time - triptime_vr < 40) { tripon_vr = 3; - } else if (tripon_vr == 3 && (y > 1250) && time - triptime_vr < S2W_TIMEOUT) { - printk(KERN_INFO "[sweep2wake]: ON"); + } else if (tripon_vr == 3 && (y > s2w_end_v) && time - triptime_vr < S2W_TIMEOUT) { + printk(KERN_INFO "[s2w]: ON"); sweep2wake_pwrtrigger(); } //right->left - if (y > 1240) { + if (y > s2w_end_v) { tripon_vl = 1; triptime_vl = time; } else if (tripon_vl == 1 && y < 896 && time - triptime_vl < 20) { tripon_vl = 2; } else if (tripon_vl == 2 && y < 488 && time - triptime_vl < 40) { tripon_vl = 3; - } else if (tripon_vl == 3 && y < 100 && (time - triptime_vl < S2W_TIMEOUT)) { - printk(KERN_INFO "[sweep2wake]: ON"); + } else if (tripon_vl == 3 && y < s2w_begin_v && (time - triptime_vl < S2W_TIMEOUT)) { + printk(KERN_INFO "[s2w]: ON"); sweep2wake_pwrtrigger(); } //top->bottom - if (x < 250) { + if (x < s2w_begin_h) { tripon_hd = 1; triptime_hd = time; } else if (tripon_hd == 1 && x > 748 && time - triptime_hd < 25) { tripon_hd = 2; } else if (tripon_hd == 2 && x > 1496 && time - triptime_hd < 45) { tripon_hd = 3; - } else if (tripon_hd == 3 && x > 2000 && (time - triptime_hd < S2W_TIMEOUT)) { - printk(KERN_INFO "[sweep2wake]: ON"); + } else if (tripon_hd == 3 && x > s2w_end_h && (time - triptime_hd < S2W_TIMEOUT)) { + printk(KERN_INFO "[s2w]: ON"); sweep2wake_pwrtrigger(); } //bottom->top - if (x > 2000) { + if (x > s2w_end_h) { tripon_hu = 1; triptime_hu = time; } else if (tripon_hu == 1 && x < 1496 && time - triptime_hu < 25) { tripon_hu = 2; } else if (tripon_hu == 2 && x < 748 && time - triptime_hu < 45) { tripon_hu = 3; - } else if (tripon_hu == 3 && x < 250 && (time - triptime_hu < S2W_TIMEOUT)) { - printk(KERN_INFO "[sweep2wake]: ON"); + } else if (tripon_hu == 3 && x < s2w_begin_h && (time - triptime_hu < S2W_TIMEOUT)) { + printk(KERN_INFO "[s2w]: ON"); sweep2wake_pwrtrigger(); } } if (scr_suspended == false && s2w_switch > 0) { //right->left portrait mode normal - if (y > 1250 && x > 2140) { + if (y > s2w_end_v && x > 2140) { tripoff_vl = 1; triptime_vl = time; } else if (tripoff_vl == 1 && y < 896 && time - triptime_vl < 20) { tripoff_vl = 2; } else if (tripoff_vl == 2 && y < 488 && time - triptime_vl < 40) { tripoff_vl = 3; - } else if (tripoff_vl == 3 && y < 200 && (time - triptime_vl < S2W_TIMEOUT)) { - printk(KERN_INFO "[sweep2wake]: OFF"); + } else if (tripoff_vl == 3 && y < (s2w_begin_v) && (time - triptime_vl < S2W_TIMEOUT)) { + printk(KERN_INFO "[s2w]: OFF"); sweep2wake_pwrtrigger(); } //left->right portrait mode upside down - if (y < 100 && x < 100) { + if (y < s2w_begin_v && x < 100) { tripoff_vr = 1; triptime_vr = time; } else if (tripoff_vr == 1 && y > 488 && time - triptime_vr < 20) { tripoff_vr = 2; } else if (tripoff_vr == 2 && y > 896 && time - triptime_vr < 40) { tripoff_vr = 3; - } else if (tripoff_vr == 3 && y > 1150 && time - triptime_vr < S2W_TIMEOUT) { - printk(KERN_INFO "[sweep2wake]: OFF"); + } else if (tripoff_vr == 3 && y > s2w_end_v && time - triptime_vr < S2W_TIMEOUT) { + printk(KERN_INFO "[s2w]: OFF"); sweep2wake_pwrtrigger(); } //top->bottom - if (x < 250 && y > 1244) { + if (x < s2w_begin_h && y > 1244) { tripoff_hd = 1; triptime_hd = time; } else if (tripoff_hd == 1 && x > 748 && time - triptime_hd < 25) { tripoff_hd = 2; } else if (tripoff_hd == 2 && x > 1496 && time - triptime_hd < 45) { tripoff_hd = 3; - } else if (tripoff_hd == 3 && x > 2050 && (time - triptime_hd < S2W_TIMEOUT)) { - printk(KERN_INFO "[sweep2wake]: OFF"); + } else if (tripoff_hd == 3 && x > s2w_end_h && (time - triptime_hd < S2W_TIMEOUT)) { + printk(KERN_INFO "[s2w]: OFF"); sweep2wake_pwrtrigger(); } //bottom->top - if (x > 2000 && y < 100) { + if (x > s2w_end_h && y < 100) { tripoff_hu = 1; triptime_hu = time; } else if (tripoff_hu == 1 && x < 1496 && time - triptime_hu < 25) { tripoff_hu = 2; } else if (tripoff_hu == 2 && x < 748 && time - triptime_hu < 45) { tripoff_hu = 3; - } else if (tripoff_hu == 3 && x < 200 && (time - triptime_hu < S2W_TIMEOUT)) { - printk(KERN_INFO "[sweep2wake]: OFF"); + } else if (tripoff_hu == 3 && x < s2w_begin_h && (time - triptime_hu < S2W_TIMEOUT)) { + printk(KERN_INFO "[s2w]: OFF"); sweep2wake_pwrtrigger(); } } @@ -434,7 +420,7 @@ void doubletap2wake_func(int x, int y) int delta_x = 0; int delta_y = 0; - printk("x=%d y=%d\n", x, y); + //printk("x=%d y=%d\n", x, y); dt2w_x[1] = dt2w_x[0]; dt2w_x[0] = x; @@ -449,7 +435,7 @@ void doubletap2wake_func(int x, int y) dt2w_time[1] = dt2w_time[0]; dt2w_time[0] = jiffies; - printk("x0=%d x1=%d time0=%lu time1=%lu\n", dt2w_2_x[0], dt2w_2_x[1], dt2w_time[0], dt2w_time[1]); + //printk("x0=%d x1=%d time0=%lu time1=%lu\n", dt2w_2_x[0], dt2w_2_x[1], dt2w_time[0], dt2w_time[1]); delta_x = (dt2w_2_x[0]-dt2w_2_x[1]); delta_y = (dt2w_2_y[0]-dt2w_2_y[1]); @@ -461,18 +447,13 @@ void doubletap2wake_func(int x, int y) printk("[dt2w]: OFF->ON\n"); sweep2wake_pwrtrigger(); - } else { - printk("dt2w: wrong time\n"); } - } else { - printk("dt2w: wrong spot\n"); - } + } } return; } - /* end sweep2wake */ @@ -695,6 +676,39 @@ static ssize_t elan_ktf3k_sweep2wake_dump(struct device *dev, static DEVICE_ATTR(sweep2wake, (S_IWUSR|S_IRUGO), elan_ktf3k_sweep2wake_show, elan_ktf3k_sweep2wake_dump); +static ssize_t elan_ktf3k_shortsweep_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + size_t count = 0; + count += sprintf(buf, "%d\n", shortsweep); + return count; +} + +static ssize_t elan_ktf3k_shortsweep_dump(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + if (buf[0] >= '0' && buf[0] <= '1' && buf[1] == '\n') + if (shortsweep != buf[0] - '0') + shortsweep = buf[0] - '0'; + + if (shortsweep) { + s2w_begin_v = 400 ; + s2w_end_v = 950; + s2w_begin_h = 650; + s2w_end_h = 1600; + } else { + s2w_begin_v = 150; + s2w_end_v = 1200; + s2w_begin_h = 350; + s2w_end_h = 1900; + } + + return count; +} + +static DEVICE_ATTR(shortsweep, (S_IWUSR|S_IRUGO), + elan_ktf3k_shortsweep_show, elan_ktf3k_shortsweep_dump); + static ssize_t elan_ktf3k_doubletap2wake_show(struct device *dev, struct device_attribute *attr, char *buf) { size_t count = 0; @@ -845,7 +859,11 @@ static int elan_ktf3k_touch_sysfs_init(void) touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); return ret; } - + ret = sysfs_create_file(android_touch_kobj, &dev_attr_shortsweep.attr); + if (ret) { + touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); + return ret; + } return 0 ; } @@ -856,6 +874,7 @@ static void elan_touch_sysfs_deinit(void) /* sweep2wake sysfs */ sysfs_remove_file(android_touch_kobj, &dev_attr_sweep2wake.attr); sysfs_remove_file(android_touch_kobj, &dev_attr_doubletap2wake.attr); + sysfs_remove_file(android_touch_kobj, &dev_attr_shortsweep.attr); kobject_del(android_touch_kobj); } @@ -1353,21 +1372,14 @@ static void elan_ktf3k_ts_report_data2(struct i2c_client *client, uint8_t *buf) uint16_t active = 0; uint8_t idx=IDX_FINGER; - printk("buffer=%d\n", *buf); - num = buf[2] & 0xf; for (i=0; i<34;i++) checksum +=buf[i]; - printk("num=%d\n", num); - printk("checksum=%d\n", checksum); - if ( (num < 3) || ((checksum & 0x00ff) == buf[34])) { fbits = buf[2] & 0x30; fbits = (fbits << 4) | buf[1]; - printk("fbits=%d\n", fbits); - //input_report_key(idev, BTN_TOUCH, 1); for(i = 0; i < FINGER_NUM; i++){ @@ -1389,7 +1401,7 @@ static void elan_ktf3k_ts_report_data2(struct i2c_client *client, uint8_t *buf) touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); /* sweep2wake */ if (s2w_switch > 0) - sweep2wake_func(x, y, jiffies); + sweep2wake_func(x, y, jiffies, i); if (dt2w_switch && scr_suspended) doubletap2wake_func(x, y); /* end sweep2wake */ @@ -1410,7 +1422,7 @@ static void elan_ktf3k_ts_report_data2(struct i2c_client *client, uint8_t *buf) /* sweep2wake */ if (checksum == 99) { if (s2w_switch > 0) - sweep2wake_func(-1, -1, jiffies); + sweep2wake_func(-1, -1, jiffies, i); if (dt2w_switch && scr_suspended) doubletap2wake_func(-1, -1); } @@ -2171,7 +2183,7 @@ static void elan_ktf3k_ts_early_suspend(struct early_suspend *h) { struct elan_ktf3k_ts_data *ts; ts = container_of(h, struct elan_ktf3k_ts_data, early_suspend); - is_suspended = 1; + //is_suspended = 1; elan_ktf3k_ts_suspend(ts->client, PMSG_SUSPEND); } @@ -2179,7 +2191,7 @@ static void elan_ktf3k_ts_late_resume(struct early_suspend *h) { struct elan_ktf3k_ts_data *ts; ts = container_of(h, struct elan_ktf3k_ts_data, early_suspend); - is_suspended = 0; + //is_suspended = 0; elan_ktf3k_ts_resume(ts->client); } #endif diff --git a/include/linux/swwep2wake.h b/include/linux/swwep2wake.h deleted file mode 100644 index a1e2f5c3e16..00000000000 --- a/include/linux/swwep2wake.h +++ /dev/null @@ -1,23 +0,0 @@ -/* -* include/linux/sweep2wake.h -* -* Copyright (c) 2013, Aaron Segaert (flar2) asegaert at gmail.com -* -* This program is free software; you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation; either version 2 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -* more details. -* -* You should have received a copy of the GNU General Public License along -* with this program; if not, write to the Free Software Foundation, Inc., -* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -*/ - - -extern void sweep2wake_setdev(struct input_dev * input_device); - From 5d2e5939f4f4a2beaa06cbb10584aabc7b1c29f0 Mon Sep 17 00:00:00 2001 From: souljaboy11792 Date: Tue, 10 Sep 2013 14:31:06 +0200 Subject: [PATCH 561/678] Fix build. Enable shortsweep by default. Try to fix portrait mode s2w/s2s. Conflicts: drivers/power/smb347-charger.c --- drivers/input/touchscreen/ektf3k.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index 4a27e964a7e..52766e66ac3 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -216,7 +216,7 @@ int s2w_begin_v = 150; int s2w_end_v = 1200; int s2w_begin_h = 350; int s2w_end_h = 1900; -int shortsweep = 0; +int shortsweep = 1; bool scr_suspended = false; int tripoff_vl = 0; int tripoff_vr = 0; @@ -367,9 +367,9 @@ void sweep2wake_func(int x, int y, unsigned long time, int i) if (y > s2w_end_v && x > 2140) { tripoff_vl = 1; triptime_vl = time; - } else if (tripoff_vl == 1 && y < 896 && time - triptime_vl < 20) { + } else if (tripoff_vl == 1 && y < 488 && time - triptime_vl < 20) { tripoff_vl = 2; - } else if (tripoff_vl == 2 && y < 488 && time - triptime_vl < 40) { + } else if (tripoff_vl == 2 && y < 896 && time - triptime_vl < 40) { tripoff_vl = 3; } else if (tripoff_vl == 3 && y < (s2w_begin_v) && (time - triptime_vl < S2W_TIMEOUT)) { printk(KERN_INFO "[s2w]: OFF"); @@ -379,9 +379,9 @@ void sweep2wake_func(int x, int y, unsigned long time, int i) if (y < s2w_begin_v && x < 100) { tripoff_vr = 1; triptime_vr = time; - } else if (tripoff_vr == 1 && y > 488 && time - triptime_vr < 20) { + } else if (tripoff_vr == 1 && y > 896 && time - triptime_vr < 20) { tripoff_vr = 2; - } else if (tripoff_vr == 2 && y > 896 && time - triptime_vr < 40) { + } else if (tripoff_vr == 2 && y > 488 && time - triptime_vr < 40) { tripoff_vr = 3; } else if (tripoff_vr == 3 && y > s2w_end_v && time - triptime_vr < S2W_TIMEOUT) { printk(KERN_INFO "[s2w]: OFF"); From da08bd2419f59c006f2b9094a9a9a2d1ac92cee3 Mon Sep 17 00:00:00 2001 From: souljaboy11792 Date: Thu, 12 Sep 2013 14:50:35 +0200 Subject: [PATCH 562/678] eft3k: Hardcode proper s2s values for portrait mode (nearest to USB port = base). --- drivers/input/touchscreen/ektf3k.c | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index 52766e66ac3..7d07d46e191 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -236,7 +236,7 @@ unsigned int dt2w_y[2] = {0, 0}; unsigned int dt2w_2_x[2] = {0, 0}; unsigned int dt2w_2_y[2] = {0, 0}; //int is_suspended = 0; -#define S2W_TIMEOUT 50 +#define S2W_TIMEOUT 30 #define DT2W_TIMEOUT_MAX 50 #define DT2W_TIMEOUT_MIN 4 #define DT2W_DELTA 150 @@ -356,7 +356,7 @@ void sweep2wake_func(int x, int y, unsigned long time, int i) tripon_hu = 2; } else if (tripon_hu == 2 && x < 748 && time - triptime_hu < 45) { tripon_hu = 3; - } else if (tripon_hu == 3 && x < s2w_begin_h && (time - triptime_hu < S2W_TIMEOUT)) { + } else if (tripon_hu == 3 && x < (s2w_begin_h) && (time - triptime_hu < S2W_TIMEOUT)) { printk(KERN_INFO "[s2w]: ON"); sweep2wake_pwrtrigger(); } @@ -364,29 +364,29 @@ void sweep2wake_func(int x, int y, unsigned long time, int i) if (scr_suspended == false && s2w_switch > 0) { //right->left portrait mode normal - if (y > s2w_end_v && x > 2140) { + if (y > s2w_end_v && x > 1848 ) { tripoff_vl = 1; triptime_vl = time; - } else if (tripoff_vl == 1 && y < 488 && time - triptime_vl < 20) { + } else if (tripoff_vl == 1 && y < 854 && time - triptime_vl < 20) { tripoff_vl = 2; - } else if (tripoff_vl == 2 && y < 896 && time - triptime_vl < 40) { + } else if (tripoff_vl == 2 && y < 427 && time - triptime_vl < 40) { tripoff_vl = 3; } else if (tripoff_vl == 3 && y < (s2w_begin_v) && (time - triptime_vl < S2W_TIMEOUT)) { printk(KERN_INFO "[s2w]: OFF"); sweep2wake_pwrtrigger(); } //left->right portrait mode upside down - if (y < s2w_begin_v && x < 100) { - tripoff_vr = 1; - triptime_vr = time; - } else if (tripoff_vr == 1 && y > 896 && time - triptime_vr < 20) { - tripoff_vr = 2; - } else if (tripoff_vr == 2 && y > 488 && time - triptime_vr < 40) { - tripoff_vr = 3; - } else if (tripoff_vr == 3 && y > s2w_end_v && time - triptime_vr < S2W_TIMEOUT) { - printk(KERN_INFO "[s2w]: OFF"); - sweep2wake_pwrtrigger(); - } + //if (y < 100 && x > 100) { + // tripoff_vr = 1; + // triptime_vr = time; + //} else if (tripoff_vr == 1 && y > 427 && time - triptime_vr < 20) { + // tripoff_vr = 2; + //} else if (tripoff_vr == 2 && y > 854 && time - triptime_vr < 40) { + // tripoff_vr = 3; + //} else if (tripoff_vr == 3 && y > (s2w_end_v) && (time - triptime_vr < S2W_TIMEOUT)) { + // printk(KERN_INFO "[s2w]: OFF"); + // sweep2wake_pwrtrigger(); + //} //top->bottom if (x < s2w_begin_h && y > 1244) { tripoff_hd = 1; From fbbdc6c081901f7d025ccd2bb92fb7880c28906c Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 25 Sep 2013 12:02:53 -0400 Subject: [PATCH 563/678] mach-tegra:board-grouper-panel.c: use stock values for custom backlight --- arch/arm/mach-tegra/board-grouper-panel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index c92a8a30d04..1566de965cb 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -40,11 +40,11 @@ #include #include -static bool otf_scaling = 1; +static bool otf_scaling = 0; module_param(otf_scaling, bool, 0644); static unsigned int min_backlight = 10; module_param(min_backlight, uint, 0644); -static unsigned int max_backlight = 160; +static unsigned int max_backlight = 255; module_param(max_backlight, uint, 0644); /* grouper default display board pins */ From bac153ff3f6cceddc6396ff4db7236c61193ef9a Mon Sep 17 00:00:00 2001 From: Kyungsik Lee Date: Fri, 9 Aug 2013 12:44:46 -0500 Subject: [PATCH 564/678] decompressor: Add LZ4 decompressor module Date Tue, 26 Feb 2013 15:24:27 +0900 This patch adds support for LZ4 decompression in the Linux Kernel. LZ4 Decompression APIs for kernel are based on LZ4 implementation by Yann Collet. LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository : http://code.google.com/p/lz4/ Signed-off-by: Kyungsik Lee v2: - Clean up code - Enable unaligned access for ARM v6 and above with CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - Add lz4_decompress() for faster decompression with uncompressed output size Signed-off-by: Paul Reioux --- include/linux/lz4.h | 48 ++++++ lib/lz4/lz4_decompress.c | 331 +++++++++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 93 +++++++++++ 3 files changed, 472 insertions(+) create mode 100644 include/linux/lz4.h create mode 100644 lib/lz4/lz4_decompress.c create mode 100644 lib/lz4/lz4defs.h diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 00000000000..66b504c84a4 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,48 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * LZ4_COMPRESSBOUND() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +#define LZ4_COMPRESSBOUND(isize) (isize + ((isize)/255) + 16) + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * a bit faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len); +#endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c new file mode 100644 index 00000000000..1998d7afb37 --- /dev/null +++ b/lib/lz4/lz4_decompress.c @@ -0,0 +1,331 @@ +/* + * LZ4 Decompressor for Linux kernel + * + * Copyright (C) 2013 LG Electronics Co., Ltd. (http://www.lge.com/) + * + * Based on LZ4 implementation by Yann Collet. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef STATIC +#include +#include +#endif +#include + +#include + +#include "lz4defs.h" + +static int lz4_uncompress(const char *source, char *dest, int osize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *ref; + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + osize; + BYTE *cpy; + + unsigned token; + + size_t length; + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + while (1) { + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + size_t len; + + /* for (; (len = *ip++) == 255; length += 255){} */ + len = *ip++; + for (; len == 255; length += 255) + len = *ip++; + length += len; + } + + /* copy literals */ + cpy = op + length; + if (unlikely(cpy > oend - COPYLENGTH)) { + + /* + * Error: not enough place for another match + * (min 4) + 5 literals + */ + if (cpy != oend) + goto _output_error; + + memcpy(op, ip, length); + ip += length; + break; /* EOF */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + + /* Error: offset create reference outside destination buffer */ + if (unlikely(ref < (BYTE *const)dest)) + goto _output_error; + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + for (; *ip == 255; length += 255) + ip++; + length += *ip++; + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > oend - COPYLENGTH) { + + /* Error: request to write beyond destination buffer */ + if (cpy > oend) + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)ip) - source); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, + int isize, size_t maxoutputsize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxoutputsize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + + if (cpy > oend) + goto _output_error;/* writes beyond buffer */ + + if (ip + length != iend) + goto _output_error;/* + * Error: LZ4 format requires + * to consume all input + * at this stage + */ + memcpy(op, ip, length); + op += length; + break;/* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const)dest) + goto _output_error; + /* + * Error : offset creates reference + * outside of destination buffer + */ + + /* get matchlength */ + length = (token & ML_MASK); + if (length == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + + /* copy repeated sequence */ + if (unlikely(op - ref < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op - ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE-4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + goto _output_error; /* write outside of buf */ + + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)op) - dest); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len) +{ + int ret = -1; + int input_len = 0; + + input_len = lz4_uncompress(src, dest, actual_dest_len); + if (input_len < 0) + goto exit_0; + *src_len = input_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress); +#endif + +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len) +{ + int ret = -1; + int out_len = 0; + + out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, + *dest_len); + if (out_len < 0) + goto exit_0; + *dest_len = out_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Decompressor"); +#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h new file mode 100644 index 00000000000..fde76e6c66d --- /dev/null +++ b/lib/lz4/lz4defs.h @@ -0,0 +1,93 @@ +/* + * lz4defs.h -- architecture specific defines + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Detects 64 bits mode + */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ + || defined(__ppc64__) || defined(__LP64__)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Architecture-specific macros + */ +#define BYTE u8 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || defined(CONFIG_ARM) \ + && __LINUX_ARM_ARCH__ >= 6 \ + && defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; + +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + +#define PUT4(s, d) (A32(d) = A32(s)) +#define PUT8(s, d) (A64(d) = A64(s)) +#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#define PUT4(s, d) \ + put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) +#define PUT8(s, d) \ + put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) +#endif + +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) + +#if LZ4_ARCH64/* 64-bit */ +#define STEPSIZE 8 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT8(s, d); \ + d += 8; \ + s += 8; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) + +#define LZ4_SECURECOPY(s, d, e) \ + do { \ + if (d < e) { \ + LZ4_WILDCOPY(s, d, e); \ + } \ + } while (0) + +#else /* 32-bit */ +#define STEPSIZE 4 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT4(s, d); \ + d += 4; \ + s += 4; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) \ + do { \ + LZ4_COPYSTEP(s, d); \ + LZ4_COPYSTEP(s, d); \ + } while (0) + +#define LZ4_SECURECOPY LZ4_WILDCOPY +#endif + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) +#define LZ4_WILDCOPY(s, d, e) \ + do { \ + LZ4_COPYPACKET(s, d); \ + } while (d < e) From 812e730ef5fba37ab7aafda7e4cc8562c49b3635 Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Wed, 14 Aug 2013 17:08:55 -0500 Subject: [PATCH 565/678] lib: Add support for LZ4-compressed kernel Date Tue, 26 Feb 2013 15:24:28 +0900 This patch adds support for extracting LZ4-compressed kernel images, as well as LZ4-compressed ramdisk images in the kernel boot process. This depends on the patch below decompressor: Add LZ4 decompressor module Signed-off-by: Kyungsik Lee v2: - Clean up code - Use lz4_decompress() for LZ4-compressed kernel during boot-process Signed-off-by: Paul Reioux --- include/linux/decompress/unlz4.h | 10 ++ init/Kconfig | 13 ++- lib/Kconfig | 7 ++ lib/Makefile | 2 + lib/decompress.c | 5 + lib/decompress_unlz4.c | 190 +++++++++++++++++++++++++++++++ lib/lz4/Makefile | 1 + lib/lz4/lz4_decompress.c | 2 +- scripts/Makefile.lib | 6 + usr/Kconfig | 9 ++ 10 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 include/linux/decompress/unlz4.h create mode 100644 lib/decompress_unlz4.c create mode 100644 lib/lz4/Makefile diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h new file mode 100644 index 00000000000..d5b68bf3ec9 --- /dev/null +++ b/include/linux/decompress/unlz4.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_UNLZ4_H +#define DECOMPRESS_UNLZ4_H + +int unlz4(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/init/Kconfig b/init/Kconfig index f6444984a08..75611da2802 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -130,10 +130,13 @@ config HAVE_KERNEL_XZ config HAVE_KERNEL_LZO bool +config HAVE_KERNEL_LZ4 + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -201,6 +204,14 @@ config KERNEL_LZO size is about 10% bigger than gzip; however its speed (both compression and decompression) is the fastest. +config KERNEL_LZ4 + bool "LZ4" + depends on HAVE_KERNEL_LZ4 + help + Its compression ratio is worse than LZO. The size of the kernel + is about 8% bigger than LZO. But the decompression speed is + faster than LZO. + endchoice config DEFAULT_HOSTNAME diff --git a/lib/Kconfig b/lib/Kconfig index 32f3e5ae2be..4dfa3bba411 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -106,6 +106,9 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_DECOMPRESS + tristate + source "lib/xz/Kconfig" # @@ -130,6 +133,10 @@ config DECOMPRESS_LZO select LZO_DECOMPRESS tristate +config DECOMPRESS_LZ4 + select LZ4_DECOMPRESS + tristate + # # Generic allocator support is selected if needed # diff --git a/lib/Makefile b/lib/Makefile index 3a74ec91d05..55418ad4c2c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ @@ -78,6 +79,7 @@ lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o +lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o diff --git a/lib/decompress.c b/lib/decompress.c index 3d766b7f60a..fc3f2dda8c3 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,9 @@ #ifndef CONFIG_DECOMPRESS_LZO # define unlzo NULL #endif +#ifndef CONFIG_DECOMPRESS_LZ4 +# define unlz4 NULL +#endif static const struct compress_format { unsigned char magic[2]; @@ -42,6 +46,7 @@ static const struct compress_format { { {0x5d, 0x00}, "lzma", unlzma }, { {0xfd, 0x37}, "xz", unxz }, { {0x89, 0x4c}, "lzo", unlzo }, + { {0x02, 0x21}, "lz4", unlz4 }, { {0, 0}, NULL, NULL } }; diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c new file mode 100644 index 00000000000..84346c47c5c --- /dev/null +++ b/lib/decompress_unlz4.c @@ -0,0 +1,190 @@ +/* + * Wrapper for decompressing LZ4-compressed kernel, initramfs, and initrd + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifdef STATIC +#define PREBOOT +#include "lz4/lz4_decompress.c" +#else +#include +#endif +#include +#include +#include +#include + +#include + + +#define LZ4_CHUNK_SIZE (8<<20) +#define ARCHIVE_MAGICNUMBER 0x184C2102 + +STATIC inline int INIT unlz4(u8 *input, int in_len, + int (*fill) (void *, unsigned int), + int (*flush) (void *, unsigned int), + u8 *output, int *posp, + void (*error) (char *x)) +{ + int ret = -1; + size_t chunksize = 0; + u8 *inp; + u8 *inp_start; + u8 *outp; + int size = in_len; +#ifdef PREBOOT + size_t out_len = get_unaligned_le32(input + in_len); +#endif + size_t dest_len; + + + if (output) { + outp = output; + } else if (!flush) { + error("NULL output pointer and no flush function provided"); + goto exit_0; + } else { + outp = large_malloc(LZ4_CHUNK_SIZE); + if (!outp) { + error("Could not allocate output buffer"); + goto exit_0; + } + } + + if (input && fill) { + error("Both input pointer and fill function provided,"); + goto exit_1; + } else if (input) { + inp = input; + } else if (!fill) { + error("NULL input pointer and missing fill function"); + goto exit_1; + } else { + inp = large_malloc(LZ4_COMPRESSBOUND(LZ4_CHUNK_SIZE)); + if (!inp) { + error("Could not allocate input buffer"); + goto exit_1; + } + } + inp_start = inp; + + if (posp) + *posp = 0; + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + } else { + error("invalid header"); + goto exit_2; + } + + if (posp) + *posp += 4; + + for (;;) { + + if (fill) + fill(inp, 4); + + chunksize = get_unaligned_le32(inp); + if (chunksize == ARCHIVE_MAGICNUMBER) { + inp += 4; + size -= 4; + if (posp) + *posp += 4; + continue; + } + inp += 4; + size -= 4; + + if (posp) + *posp += 4; + + if (fill) { + if (chunksize > LZ4_COMPRESSBOUND(LZ4_CHUNK_SIZE)) { + error("chunk length is longer than allocated"); + goto exit_2; + } + fill(inp, chunksize); + } +#ifdef PREBOOT + if (out_len >= LZ4_CHUNK_SIZE) { + dest_len = LZ4_CHUNK_SIZE; + out_len -= dest_len; + } else + dest_len = out_len; + ret = lz4_decompress(inp, &chunksize, outp, dest_len); +#else + dest_len = LZ4_CHUNK_SIZE; + ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp, + &dest_len); +#endif + if (ret < 0) { + error("Decoding failed"); + goto exit_2; + } + + if (flush && flush(outp, dest_len) != dest_len) + goto exit_2; + if (output) + outp += dest_len; + if (posp) + *posp += chunksize; + + size -= chunksize; + + if (size == 0) + break; + else if (size < 0) { + error("data corrupted"); + goto exit_2; + } + + inp += chunksize; + if (fill) + inp = inp_start; + } + + ret = 0; +exit_2: + if (!input) + large_free(inp_start); +exit_1: + if (!output) + large_free(outp); +exit_0: + return ret; +} + +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ) +{ + return unlz4(buf, in_len - 4, fill, flush, output, posp, error); +} +#endif diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 00000000000..7f548c6d1c5 --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 1998d7afb37..f58eaca5127 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -1,7 +1,7 @@ /* * LZ4 Decompressor for Linux kernel * - * Copyright (C) 2013 LG Electronics Co., Ltd. (http://www.lge.com/) + * Copyright (C) 2013, LG Electronics, Kyungsik Lee * * Based on LZ4 implementation by Yann Collet. * diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index aeea84a2483..e3f506729ec 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -300,6 +300,12 @@ cmd_lzo = (cat $(filter-out FORCE,$^) | \ lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ (rm -f $@ ; false) +quiet_cmd_lz4 = LZ4 $@ +cmd_lz4 = (cat $(filter-out FORCE,$^) | \ + lz4 -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + + # XZ # --------------------------------------------------------------------------- # Use xzkern to compress the kernel image and xzmisc to compress other things. diff --git a/usr/Kconfig b/usr/Kconfig index 65b845bd4e3..16ffe99bbad 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -90,6 +90,15 @@ config RD_LZO Support loading of a LZO encoded initial ramdisk or cpio buffer If unsure, say N. +config RD_LZ4 + bool "Support initial ramdisks compressed using LZ4" if EXPERT + default !EXPERT + depends on BLK_DEV_INITRD + select DECOMPRESS_LZ4 + help + Support loading of a LZ4 encoded initial ramdisk or cpio buffer + If unsure, say N. + choice prompt "Built-in initramfs compression mode" if INITRAMFS_SOURCE!="" help From 5f017eb3887134bc473c0850e6863f3bdc41cf51 Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Wed, 14 Aug 2013 17:12:08 -0500 Subject: [PATCH 566/678] arm: Add support for LZ4-compressed kernel Date Tue, 26 Feb 2013 15:24:29 +0900 This patch integrates the LZ4 decompression code to the arm pre-boot code. And it depends on two patchs below lib: Add support for LZ4-compressed kernel decompressor: Add LZ4 decompressor module Signed-off-by: Kyungsik Lee v2: - Apply CFLAGS, -Os to decompress.o to improve decompress performance during boot-up process Signed-off-by: Paul Reioux Conflicts: arch/arm/boot/compressed/Makefile --- arch/arm/Kconfig | 1 + arch/arm/boot/compressed/Makefile | 7 ++++++- arch/arm/boot/compressed/decompress.c | 4 ++++ arch/arm/boot/compressed/piggy.lz4.S | 6 ++++++ 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 arch/arm/boot/compressed/piggy.lz4.S diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9550edfc140..453978d3bbe 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -19,6 +19,7 @@ config ARM select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO + select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_IRQ_WORK select HAVE_PERF_EVENTS diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 833905c6ecc..a422bbc5c58 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -32,6 +32,10 @@ MISC = misc.o AFLAGS_decompress.o += -Wa,-march=armv7-a$(plus_sec) DECOMPRESS = decompress.o +ifeq ($(CONFIG_KERNEL_LZ4),y) +CFLAGS_decompress.o := -Os +endif + FONTC = $(srctree)/drivers/video/console/font_acorn_8x8.c # @@ -96,13 +100,14 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo suffix_$(CONFIG_KERNEL_LZMA) = lzma +suffix_$(CONFIG_KERNEL_LZ4) = lz4 targets := vmlinux vmlinux.lds \ piggy.$(suffix_y) piggy.$(suffix_y).o \ font.o font.c head.o misc.o decompress.o $(OBJS) # Make sure files are removed during clean -extra-y += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S +extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.lz4 lib1funcs.S ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index 07be5a2f830..dfeaff40df1 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -44,6 +44,10 @@ extern void error(char *); #include "../../../../lib/decompress_unlzma.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { return decompress(input, len, NULL, NULL, output, NULL, error); diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S new file mode 100644 index 00000000000..3d9a575618a --- /dev/null +++ b/arch/arm/boot/compressed/piggy.lz4.S @@ -0,0 +1,6 @@ + .section .piggydata,#alloc + .globl input_data +input_data: + .incbin "arch/arm/boot/compressed/piggy.lz4" + .globl input_data_end +input_data_end: From 9feedcc64bbd093e443acfff412139d2fb3e4d39 Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Fri, 9 Aug 2013 12:56:51 -0500 Subject: [PATCH 567/678] lib: Add lz4 compressor module From Chanho Min <> This patch adds support for LZ4 compression in the Linux Kernel. LZ4 Compression APIs for kernel are based on LZ4 implementation by Yann Collet and changed with kernel coding style. LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository : http://code.google.com/p/lz4/ svn revision : r90 Two APIs are added: lz4_compress() support basic lz4 compression whereas lz4hc_compress() support high compression or CPU performance get lower but compression ratio get higher. Also, we require the pre-allocated working memory with the defined size and destination buffer must be allocated with the size of lz4_compressbound. Signed-off-by: Chanho Min Signed-off-by: Paul Reioux --- include/linux/lz4.h | 36 +++ lib/Kconfig | 6 + lib/Makefile | 2 + lib/lz4/Makefile | 2 + lib/lz4/lz4_compress.c | 443 ++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 66 ++++- lib/lz4/lz4hc_compress.c | 539 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 1092 insertions(+), 2 deletions(-) create mode 100644 lib/lz4/lz4_compress.c create mode 100644 lib/lz4/lz4hc_compress.c diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 66b504c84a4..cd7ff315cfb 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -9,6 +9,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) +#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) /* * LZ4_COMPRESSBOUND() @@ -17,6 +19,40 @@ */ #define LZ4_COMPRESSBOUND(isize) (isize + ((isize)/255) + 16) +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* * lz4_decompress() * src : source address of the compressed data diff --git a/lib/Kconfig b/lib/Kconfig index 4dfa3bba411..b80c866dd5b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -106,6 +106,12 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_COMPRESS + tristate + +config LZ4HC_COMPRESS + tristate + config LZ4_DECOMPRESS tristate diff --git a/lib/Makefile b/lib/Makefile index 55418ad4c2c..4feda350a71 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -70,6 +70,8 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile index 7f548c6d1c5..8085d04e930 100644 --- a/lib/lz4/Makefile +++ b/lib/lz4/Makefile @@ -1 +1,3 @@ +obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c new file mode 100644 index 00000000000..fd94058bd7f --- /dev/null +++ b/lib/lz4/lz4_compress.c @@ -0,0 +1,443 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + HTYPE *hashtable = (HTYPE *)ctx; + const u8 *ip = (u8 *)source; +#if LZ4_ARCH64 + const BYTE * const base = ip; +#else + const int base = 0; +#endif + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardh = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (unlikely(forwardip > mflimit)) + goto _last_literals; + + forwardh = LZ4_HASH_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = ip - base; + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + + if (length >= (int)RUN_MASK) { + int len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + /* Encode MatchLength */ + length = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) + return 0; + if (length >= (int)ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length > 254) { + length -= 255; + *op++ = 255; + } + *op++ = (u8)length; + } else + *token += length; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + /* Test next position */ + ref = base + hashtable[LZ4_HASH_VALUE(ip)]; + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (((char *)op - dest) + lastrun + 1 + + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) + return 0; + + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + + /* End */ + return (int)(((char *)op) - dest); +} + +static inline int lz4_compress64kctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + u16 *hashtable = (u16 *)ctx; + const u8 *ip = (u8 *) source; + const u8 *anchor = ip; + const u8 *const base = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int len, length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (forwardip > mflimit) + goto _last_literals; + + forwardh = LZ4_HASH64K_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = (u16)(ip - base); + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) + && (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + + while (ip < MATCHLIMIT - (STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + + /* Encode MatchLength */ + len = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return 0; + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (u8)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + + /* Test next position */ + ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; + hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) + return 0; + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int)(((char *)op) - dest); +} + +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + if (src_len < LZ4_64KLIMIT) + out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + else + out_len = lz4_compressctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + + return 0; +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index fde76e6c66d..1c87125c4e2 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -22,23 +22,40 @@ * Architecture-specific macros */ #define BYTE u8 +typedef struct _U16_S { u16 v; } U16_S; +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || defined(CONFIG_ARM) \ && __LINUX_ARM_ARCH__ >= 6 \ && defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) -typedef struct _U32_S { u32 v; } U32_S; -typedef struct _U64_S { u64 v; } U64_S; +#define A16(x) (((U16_S *)(x))->v) #define A32(x) (((U32_S *)(x))->v) #define A64(x) (((U64_S *)(x))->v) #define PUT4(s, d) (A32(d) = A32(s)) #define PUT8(s, d) (A64(d) = A64(s)) +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + A16(p) = v; \ + p += 2; \ + } while (0) #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ +#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) +#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) +#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) + #define PUT4(s, d) \ put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) #define PUT8(s, d) \ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned(v, (u16 *)(p)); \ + p += 2; \ + } while (0) #endif #define COPYLENGTH 8 @@ -46,6 +63,29 @@ typedef struct _U64_S { u64 v; } U64_S; #define ML_MASK ((1U << ML_BITS) - 1) #define RUN_BITS (8 - ML_BITS) #define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - (MEMORY_USAGE-2))) +#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASHLOG64K)) +#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASH_LOG)) #if LZ4_ARCH64/* 64-bit */ #define STEPSIZE 8 @@ -65,6 +105,13 @@ typedef struct _U64_S { u64 v; } U64_S; LZ4_WILDCOPY(s, d, e); \ } \ } while (0) +#define HTYPE u32 + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) +#endif #else /* 32-bit */ #define STEPSIZE 4 @@ -83,6 +130,14 @@ typedef struct _U64_S { u64 v; } U64_S; } while (0) #define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const u8* + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#endif + #endif #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ @@ -91,3 +146,10 @@ typedef struct _U64_S { u64 v; } U64_S; do { \ LZ4_COPYPACKET(s, d); \ } while (d < e) + +#define LZ4_BLINDCOPY(s, d, l) \ + do { \ + u8 *e = (d) + l; \ + LZ4_WILDCOPY(s, d, e); \ + d = e; \ + } while (0) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c new file mode 100644 index 00000000000..a9a9c2a00c5 --- /dev/null +++ b/lib/lz4/lz4hc_compress.c @@ -0,0 +1,539 @@ +/* + * LZ4 HC - High Compression Mode of LZ4 + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +struct lz4hc_data { + const u8 *base; + HTYPE hashtable[HASHTABLESIZE]; + u16 chaintable[MAXD]; + const u8 *nexttoupdate; +} __attribute__((__packed__)); + +static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +{ + memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); + memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); + +#if LZ4_ARCH64 + hc4->nexttoupdate = base + 1; +#else + hc4->nexttoupdate = base; +#endif + hc4->base = base; + return 1; +} + +/* Update chains up to ip (excluded) */ +static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +{ + u16 *chaintable = hc4->chaintable; + HTYPE *hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + + while (hc4->nexttoupdate < ip) { + const u8 *p = hc4->nexttoupdate; + size_t delta = p - (hashtable[HASH_VALUE(p)] + base); + if (delta > MAX_DISTANCE) + delta = MAX_DISTANCE; + chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; + hashtable[HASH_VALUE(p)] = (p) - base; + hc4->nexttoupdate++; + } +} + +static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, + const u8 *const matchlimit) +{ + const u8 *p1t = p1; + + while (p1t < matchlimit - (STEPSIZE - 1)) { +#if LZ4_ARCH64 + u64 diff = A64(p2) ^ A64(p1t); +#else + u32 diff = A32(p2) ^ A32(p1t); +#endif + if (!diff) { + p1t += STEPSIZE; + p2 += STEPSIZE; + continue; + } + p1t += LZ4_NBCOMMONBYTES(diff); + return p1t - p1; + } +#if LZ4_ARCH64 + if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { + p1t += 4; + p2 += 4; + } +#endif + + if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { + p1t += 2; + p2 += 2; + } + if ((p1t < matchlimit) && (*p2 == *p1t)) + p1t++; + return p1t - p1; +} + +static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; + const u8 *ref; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + int nbattempts = MAX_NB_ATTEMPTS; + size_t repl = 0, ml = 0; + u16 delta; + + /* HC4 match finder */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + /* potential repetition */ + if (ref >= ip-4) { + /* confirmed */ + if (A32(ref) == A32(ip)) { + delta = (u16)(ip-ref); + repl = ml = lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + while ((ref >= ip - MAX_DISTANCE) && nbattempts) { + nbattempts--; + if (*(ref + ml) == *(ip + ml)) { + if (A32(ref) == A32(ip)) { + size_t mlt = + lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { + ml = mlt; + *matchpos = ref; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + /* Complete table */ + if (repl) { + const BYTE *ptr = ip; + const BYTE *end; + end = ip + repl - (MINMATCH-1); + /* Pre-Load */ + while (ptr < end - delta) { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + ptr++; + } + do { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + /* Head of chain */ + hashtable[HASH_VALUE(ptr)] = (ptr) - base; + ptr++; + } while (ptr < end); + hc4->nexttoupdate = end; + } + + return (int)ml; +} + +static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, + const u8 **matchpos, const u8 **startpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + const u8 *ref; + int nbattempts = MAX_NB_ATTEMPTS; + int delta = (int)(ip - startlimit); + + /* First Match */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) + && (nbattempts)) { + nbattempts--; + if (*(startlimit + longest) == *(ref - delta + longest)) { + if (A32(ref) == A32(ip)) { + const u8 *reft = ref + MINMATCH; + const u8 *ipt = ip + MINMATCH; + const u8 *startt = ip; + + while (ipt < matchlimit-(STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(reft) ^ A64(ipt); + #else + u32 diff = A32(reft) ^ A32(ipt); + #endif + + if (!diff) { + ipt += STEPSIZE; + reft += STEPSIZE; + continue; + } + ipt += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ipt < (matchlimit - 3)) + && (A32(reft) == A32(ipt))) { + ipt += 4; + reft += 4; + } + ipt += 2; + #endif + if ((ipt < (matchlimit - 1)) + && (A16(reft) == A16(ipt))) { + reft += 2; + } + if ((ipt < matchlimit) && (*reft == *ipt)) + ipt++; +_endcount: + reft = ref; + + while ((startt > startlimit) + && (reft > hc4->base) + && (startt[-1] == reft[-1])) { + startt--; + reft--; + } + + if ((ipt - startt) > longest) { + longest = (int)(ipt - startt); + *matchpos = reft; + *startpos = startt; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + return longest; +} + +static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, + int ml, const u8 *ref) +{ + int length, len; + u8 *token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *(*op)++ = 255; + *(*op)++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(*anchor, *op, length); + + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + + /* Encode MatchLength */ + len = (int)(ml - MINMATCH); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *(*op)++ = 255; + *(*op)++ = 255; + } + if (len > 254) { + len -= 255; + *(*op)++ = 255; + } + *(*op)++ = (u8)len; + } else + *token += len; + + /* Prepare next loop */ + *ip += ml; + *anchor = *ip; + + return 0; +} + +int lz4_compresshcctx(struct lz4hc_data *ctx, + const char *source, + char *dest, + int isize) +{ + const u8 *ip = (const u8 *)source; + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + const u8 *const matchlimit = (iend - LASTLITERALS); + + u8 *op = (u8 *)dest; + + int ml, ml2, ml3, ml0; + const u8 *ref = NULL; + const u8 *start2 = NULL; + const u8 *ref2 = NULL; + const u8 *start3 = NULL; + const u8 *ref3 = NULL; + const u8 *start0; + const u8 *ref0; + int lastrun; + + ip++; + + /* Main Loop */ + while (ip < mflimit) { + ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + if (!ml) { + ip++; + continue; + } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; +_search2: + if (ip+ml < mflimit) + ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, + ip + 1, matchlimit, ml, &ref2, &start2); + else + ml2 = ml; + /* No better match */ + if (ml2 == ml) { + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + continue; + } + + if (start0 < ip) { + /* empirical */ + if (start2 < ip + ml0) { + ip = start0; + ref = ref0; + ml = ml0; + } + } + /* + * Here, start0==ip + * First Match too small : removed + */ + if ((start2 - ip) < 3) { + ml = ml2; + ip = start2; + ref = ref2; + goto _search2; + } + +_search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* + * Now, we have start2 = ip+new_ml, + * with new_ml=min(ml, OPTIMAL_ML=18) + */ + if (start2 + ml2 < mflimit) + ml3 = lz4hc_insertandgetwidermatch(ctx, + start2 + ml2 - 3, start2, matchlimit, + ml2, &ref3, &start3); + else + ml3 = ml2; + + /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) + ml = (int)(start2 - ip); + + /* Now, encode 2 sequences */ + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start2; + lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + continue; + } + + /* Not enough space for match 2 : remove it */ + if (start3 < ip + ml + 3) { + /* + * can write Seq1 immediately ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ + if (start3 >= (ip + ml)) { + if (start2 < ip + ml) { + int correction = + (int)(ip + ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least + * the first one ip & ref are known; Now for ml + */ + if (start2 < ip + ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) + ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) + ml = (int)(start2 - ip) + ml2 + - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else + ml = (int)(start2 - ip); + } + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _search3; + } + + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8) lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int) (((char *)op) - dest); +} + +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; + lz4hc_init(hc4, (const u8 *)src); + out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, + (char *)dst, (int)src_len); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + return 0; + +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4hc_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC compressor"); From 3dd0b91b472ada3fd7f501970c2eccb39436d53b Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Fri, 9 Aug 2013 12:58:40 -0500 Subject: [PATCH 568/678] crypto: Add lz4 Cryptographic API From Chanho Min <> This patch adds support for lz4 and lz4hc compression algorithm using the lib/lz4/* codebase. Signed-off-by: Chanho Min Signed-off-by: Paul Reioux --- crypto/Kconfig | 16 ++++++++ crypto/Makefile | 2 + crypto/lz4.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ crypto/lz4hc.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+) create mode 100644 crypto/lz4.c create mode 100644 crypto/lz4hc.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 49b5dcf58e4..823db834818 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -856,6 +856,22 @@ config CRYPTO_LZO help This is the LZO algorithm. +config CRYPTO_LZ4 + tristate "LZ4 compression algorithm" + select CRYPTO_ALGAPI + select LZ4_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 algorithm. + +config CRYPTO_LZ4HC + tristate "LZ4HC compression algorithm" + select CRYPTO_ALGAPI + select LZ4HC_COMPRESS + select LZ4_DECOMPRESS + help + This is the LZ4 high compression mode algorithm. + comment "Random Number Generation" config CRYPTO_ANSI_CPRNG diff --git a/crypto/Makefile b/crypto/Makefile index ce5a813d363..4ab082dc51f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -80,6 +80,8 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o +obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o obj-$(CONFIG_CRYPTO_RNG2) += krng.o obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o diff --git a/crypto/lz4.c b/crypto/lz4.c new file mode 100644 index 00000000000..98bfdd71e78 --- /dev/null +++ b/crypto/lz4.c @@ -0,0 +1,105 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include + +struct lz4_ctx { + void *lz4_comp_mem; +}; + +static int lz4_init(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS); + if (!ctx->lz4_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4_exit(struct crypto_tfm *tfm) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + vfree(ctx->lz4_comp_mem); +} + +static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + + err = lz4_decompress(src, &slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4 = { + .cra_name = "lz4", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4.cra_list), + .cra_init = lz4_init, + .cra_exit = lz4_exit, + .cra_u = { .compress = { + .coa_compress = lz4_compress_crypto, + .coa_decompress = lz4_decompress_crypto } } +}; + +static int __init lz4_mod_init(void) +{ + return crypto_register_alg(&alg_lz4); +} + +static void __exit lz4_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4); +} + +module_init(lz4_mod_init); +module_exit(lz4_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Compression Algorithm"); diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c new file mode 100644 index 00000000000..d3c9625917c --- /dev/null +++ b/crypto/lz4hc.c @@ -0,0 +1,105 @@ +/* + * Cryptographic API. + * + * Copyright (c) 2013 Chanho Min + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include +#include +#include +#include +#include + +struct lz4hc_ctx { + void *lz4hc_comp_mem; +}; + +static int lz4hc_init(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS); + if (!ctx->lz4hc_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4hc_exit(struct crypto_tfm *tfm) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + vfree(ctx->lz4hc_comp_mem); +} + +static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + size_t tmp_len = *dlen; + int err; + + err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem); + + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; + + err = lz4_decompress(src, &slen, dst, tmp_len); + if (err < 0) + return -EINVAL; + + *dlen = tmp_len; + return err; +} + +static struct crypto_alg alg_lz4hc = { + .cra_name = "lz4hc", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4hc_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4hc.cra_list), + .cra_init = lz4hc_init, + .cra_exit = lz4hc_exit, + .cra_u = { .compress = { + .coa_compress = lz4hc_compress_crypto, + .coa_decompress = lz4hc_decompress_crypto } } +}; + +static int __init lz4hc_mod_init(void) +{ + return crypto_register_alg(&alg_lz4hc); +} + +static void __exit lz4hc_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4hc); +} + +module_init(lz4hc_mod_init); +module_exit(lz4hc_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC Compression Algorithm"); From 9e3a32a44af1d7bb43843846cc6b7eeba04054fc Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Fri, 9 Aug 2013 13:05:22 -0500 Subject: [PATCH 569/678] lib/lz4/lz4_compress: fix macro usage error Signed-off-by: Paul Reioux --- lib/lz4/lz4_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index fd94058bd7f..10762814137 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -423,10 +423,10 @@ int lz4_compress(const unsigned char *src, size_t src_len, if (src_len < LZ4_64KLIMIT) out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, - lz4_compressbound(src_len)); + LZ4_COMPRESSBOUND(src_len)); else out_len = lz4_compressctx(wrkmem, src, dst, src_len, - lz4_compressbound(src_len)); + LZ4_COMPRESSBOUND(src_len)); if (out_len < 0) goto exit; From 7ea0737ce0aee446f6c91c42c87a28673ff84e95 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 9 Aug 2013 13:07:32 -0500 Subject: [PATCH 570/678] LZ4: compression/decompression signedness mismatch (v2) LZ4 compression and decompression functions require different in signedness input/output parameters: unsigned char for compression and signed char for decompression. Change decompression API to require "(const) unsigned char *". v2: minor coding style fix. Signed-off-by: Sergey Senozhatsky Signed-off-by: Paul Reioux --- include/linux/lz4.h | 8 ++++---- lib/lz4/lz4_decompress.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/lz4.h b/include/linux/lz4.h index cd7ff315cfb..312f44939c7 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -64,8 +64,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len, * note : Destination buffer must be already allocated. * a bit faster than lz4_decompress_unknownoutputsize() */ -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len); +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); /* * lz4_decompress_unknownoutputsize() @@ -79,6 +79,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, * Error if return (< 0) * note : Destination buffer must be already allocated. */ -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len); +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len); #endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index f58eaca5127..5e7f2dbcc53 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -288,8 +288,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, return (int) (-(((char *)ip) - source)); } -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len) +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len) { int ret = -1; int input_len = 0; @@ -307,8 +307,8 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, EXPORT_SYMBOL_GPL(lz4_decompress); #endif -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len) +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len) { int ret = -1; int out_len = 0; From 2be21bd653ccb3bd7879e4b7ca71b55ec6f4caa2 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sat, 5 Oct 2013 22:12:43 -0400 Subject: [PATCH 571/678] defconfig: a62 --- arch/arm/configs/metallice_grouper_defconfig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index cebd062edbf..5e94dc1f27c 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,14 +38,16 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-mr2" +CONFIG_LOCALVERSION="-MKernel-a62" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_LZO=y -CONFIG_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_LZ4=y +# CONFIG_KERNEL_GZIP is not set # CONFIG_KERNEL_LZMA is not set # CONFIG_KERNEL_LZO is not set +CONFIG_KERNEL_LZ4=y CONFIG_DEFAULT_HOSTNAME="(none)" # CONFIG_SWAP is not set # CONFIG_SYSVIPC is not set @@ -102,6 +104,7 @@ CONFIG_RD_GZIP=y # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set # CONFIG_RD_LZO is not set +CONFIG_RD_LZ4=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -3360,6 +3363,8 @@ CONFIG_CRYPTO_TWOFISH_COMMON=y CONFIG_CRYPTO_DEFLATE=y # CONFIG_CRYPTO_ZLIB is not set # CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_LZ4=y +# CONFIG_CRYPTO_LZ4HC is not set # # Random Number Generation @@ -3390,9 +3395,12 @@ CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y CONFIG_LZO_DECOMPRESS=y +CONFIG_LZ4_COMPRESS=y +CONFIG_LZ4_DECOMPRESS=y # CONFIG_XZ_DEC is not set # CONFIG_XZ_DEC_BCJ is not set CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_LZ4=y CONFIG_REED_SOLOMON=y CONFIG_REED_SOLOMON_ENC8=y CONFIG_REED_SOLOMON_DEC8=y From 9c243f85650e7de9bd4935d948a4e971c376a1cf Mon Sep 17 00:00:00 2001 From: Lee Susman Date: Sun, 5 May 2013 14:31:17 +0000 Subject: [PATCH 572/678] mm: pass readahead info down to the i/o scheduler Some i/o schedulers (i.e. row-iosched, cfq-iosched) deploy an idling algorithm in order to be better synced with the readahead algorithm. Idling is a prediction algorithm for incoming read requests. In this patch we mark pages which are part of a readahead window, by setting a newly introduced flag. With this flag, the i/o scheduler can identify a request which is associated with a readahead page. This enables the i/o scheduler's idling mechanism to be en-sync with the readahead mechanism and, in turn, can increase read throughput. Change-Id: I0654f23315b6d19d71bcc9cc029c6b281a44b196 Signed-off-by: Lee Susman --- include/linux/page-flags.h | 1 + mm/page_alloc.c | 1 + mm/readahead.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e90a673be67..d7fc4ee692e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -107,6 +107,7 @@ enum pageflags { #ifdef CONFIG_TRANSPARENT_HUGEPAGE PG_compound_lock, #endif + PG_readahead, /* page in a readahead window */ __NR_PAGEFLAGS, /* Filesystems */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e6c28f28ad5..53e6b8a6bb7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5811,6 +5811,7 @@ static struct trace_print_flags pageflag_names[] = { #ifdef CONFIG_MEMORY_FAILURE {1UL << PG_hwpoison, "hwpoison" }, #endif + {1UL << PG_readahead, "PG_readahead" }, {-1UL, NULL }, }; diff --git a/mm/readahead.c b/mm/readahead.c index 867f9dd82dc..e1bc5681a8f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -184,6 +184,9 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, if (!page) break; page->index = page_offset; + + page->flags |= (1L << PG_readahead); + list_add(&page->lru, &page_pool); if (page_idx == nr_to_read - lookahead_size) SetPageReadahead(page); From 11e09126c5ab7c59a807b551954f5631cb76c4e8 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Sun, 24 Feb 2013 10:46:15 -0600 Subject: [PATCH 573/678] tmpfs: fix use-after-free of mempolicy object The tmpfs remount logic preserves filesystem mempolicy if the mpol=M option is not specified in the remount request. A new policy can be specified if mpol=M is given. Before this patch remounting an mpol bound tmpfs without specifying mpol= mount option in the remount request would set the filesystem's mempolicy object to a freed mempolicy object. To reproduce the problem boot a DEBUG_PAGEALLOC kernel and run: # mkdir /tmp/x # mount -t tmpfs -o size=100M,mpol=interleave nodev /tmp/x # grep /tmp/x /proc/mounts nodev /tmp/x tmpfs rw,relatime,size=102400k,mpol=interleave:0-3 0 0 # mount -o remount,size=200M nodev /tmp/x # grep /tmp/x /proc/mounts nodev /tmp/x tmpfs rw,relatime,size=204800k,mpol=??? 0 0 # note ? garbage in mpol=... output above # dd if=/dev/zero of=/tmp/x/f count=1 # panic here Panic: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) [...] Oops: 0010 [#1] SMP DEBUG_PAGEALLOC Call Trace: [] ? mpol_set_nodemask+0x8d/0x100 [] ? mpol_shared_policy_init+0x8f/0x160 [] mpol_shared_policy_init+0xa5/0x160 [] ? shmem_get_inode+0x1e1/0x270 [] ? shmem_get_inode+0x1e1/0x270 [] ? trace_hardirqs_on+0xd/0x10 [] shmem_get_inode+0x209/0x270 [] shmem_mknod+0x3e/0xf0 [] shmem_create+0x18/0x20 [] vfs_create+0xb5/0x130 [] do_last+0x9a1/0xea0 [] ? link_path_walk+0x7a/0x930 [] path_openat+0xb3/0x4d0 [] ? __alloc_fd+0x31/0x160 [] do_filp_open+0x42/0xa0 [] ? __alloc_fd+0xe0/0x160 [] do_sys_open+0xfe/0x1e0 [] compat_sys_open+0x1b/0x20 [] cstar_dispatch+0x7/0x1f Non-debug kernels will not crash immediately because referencing the dangling mpol will not cause a fault. Instead the filesystem will reference a freed mempolicy object, which will cause unpredictable behavior. The problem boils down to a dropped mpol reference below if shmem_parse_options() does not allocate a new mpol: config = *sbinfo shmem_parse_options(data, &config, true) mpol_put(sbinfo->mpol) sbinfo->mpol = config.mpol /* BUG: saves unreferenced mpol */ This patch avoids the crash by not releasing the mempolicy if shmem_parse_options() doesn't create a new mpol. How far back does this issue go? I see it in both 2.6.36 and 3.3. I did not look back further. Signed-off-by: Greg Thelen --- mm/shmem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index fba53caba0d..5943b3d1459 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2081,6 +2081,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) unsigned long inodes; int error = -EINVAL; + config.mpol = NULL; if (shmem_parse_options(data, &config, true)) return error; @@ -2105,8 +2106,13 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) sbinfo->max_inodes = config.max_inodes; sbinfo->free_inodes = config.max_inodes - inodes; - mpol_put(sbinfo->mpol); - sbinfo->mpol = config.mpol; /* transfers initial ref */ + /* + * Preserve previous mempolicy unless mpol remount option was specified. + */ + if (config.mpol) { + mpol_put(sbinfo->mpol); + sbinfo->mpol = config.mpol; /* transfers initial ref */ + } out: spin_unlock(&sbinfo->stat_lock); return error; From f9ac9f6c4ef095c4e66653c6c1c84a7c9452e679 Mon Sep 17 00:00:00 2001 From: faux123 Date: Sun, 24 Feb 2013 10:57:41 -0600 Subject: [PATCH 574/678] tmpfs: fix mempolicy object leaks This patch fixes several mempolicy leaks in the tmpfs mount logic. These leaks are slow - on the order of one object leaked per mount attempt. Leak 1 (umount doesn't free mpol allocated in mount): while true; do mount -t tmpfs -o mpol=interleave,size=100M nodev /mnt umount /mnt done Leak 2 (errors parsing remount options will leak mpol): mount -t tmpfs -o size=100M nodev /mnt while true; do mount -o remount,mpol=interleave,size=x /mnt 2> /dev/null done umount /mnt Leak 3 (multiple mpol per mount leak mpol): while true; do mount -t tmpfs -o mpol=interleave,mpol=interleave,size=100M nodev /mnt umount /mnt done This patch fixes all of the above. I could have broken the patch into three pieces but is seemed easier to review as one. Signed-off-by: Greg Thelen modified for Mako kernel from LKML reference Signed-off-by: faux123 --- mm/shmem.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 5943b3d1459..d00a6258f94 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1988,6 +1988,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, bool remount) { char *this_char, *value, *rest; + struct mempolicy *mpol = NULL; while (options != NULL) { this_char = options; @@ -2014,7 +2015,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, printk(KERN_ERR "tmpfs: No value for mount option '%s'\n", this_char); - return 1; + goto error; } if (!strcmp(this_char,"size")) { @@ -2057,19 +2058,25 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, if (*rest) goto bad_val; } else if (!strcmp(this_char,"mpol")) { - if (mpol_parse_str(value, &sbinfo->mpol, 1)) + mpol_put(mpol); + if (mpol_parse_str(value, &mpol, 1)) { + mpol = NULL; goto bad_val; + } } else { printk(KERN_ERR "tmpfs: Bad mount option %s\n", this_char); - return 1; + goto error; } } + sbinfo->mpol = mpol; return 0; bad_val: printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", value, this_char); +error: + mpol_put(mpol); return 1; } @@ -2143,6 +2150,7 @@ static void shmem_put_super(struct super_block *sb) struct shmem_sb_info *sbinfo = SHMEM_SB(sb); percpu_counter_destroy(&sbinfo->used_blocks); + mpol_put(sbinfo->mpol); kfree(sbinfo); sb->s_fs_info = NULL; } From 50d6fbfeac14ab04c4ed16a3626839c54cda9522 Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Mon, 15 Apr 2013 14:30:06 -0500 Subject: [PATCH 575/678] dynamic fsync: favor true case since most will be using this feature Signed-off-by: Paul Reioux --- fs/sync.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index 69c7200923b..320fc3fe0bd 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -185,7 +185,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (unlikely(dyn_fsync_active && !early_suspend_active)) + if (likely(dyn_fsync_active && !early_suspend_active)) return 0; else { #endif @@ -243,7 +243,7 @@ SYSCALL_DEFINE1(fsync, unsigned int, fd) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (unlikely(dyn_fsync_active && !early_suspend_active)) + if (likely(dyn_fsync_active && !early_suspend_active)) return 0; else #endif @@ -257,7 +257,7 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (unlikely(dyn_fsync_active && !early_suspend_active)) + if (likely(dyn_fsync_active && !early_suspend_active)) return 0; else #endif @@ -341,7 +341,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, return 0; #endif #ifdef CONFIG_DYNAMIC_FSYNC - if (unlikely(dyn_fsync_active && !early_suspend_active)) + if (likely(dyn_fsync_active && !early_suspend_active)) return 0; else { #endif @@ -445,7 +445,7 @@ SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, loff_t offset, loff_t nbytes) { #ifdef CONFIG_DYNAMIC_FSYNC - if (unlikely(dyn_fsync_active && !early_suspend_active)) + if (likely(dyn_fsync_active && !early_suspend_active)) return 0; else #endif From b313c15cb6cf3c910c7959c95c9766720b40b02f Mon Sep 17 00:00:00 2001 From: Paul Reioux Date: Mon, 10 Jun 2013 02:19:36 -0500 Subject: [PATCH 576/678] dynamic fsync: don't disable fdatasync() (conflict: remove fsync toggle) Signed-off-by: Paul Reioux Conflicts: fs/sync.c --- fs/sync.c | 43 +------------------------------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index 320fc3fe0bd..27626fa6d09 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -26,10 +26,6 @@ extern bool dyn_fsync_active; #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) -#ifdef CONFIG_FSYNC_CONTROL -extern bool fsynccontrol_fsync_enabled(); -#endif - /* * Do the filesystem syncing work. For simple filesystems * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to @@ -148,11 +144,6 @@ SYSCALL_DEFINE1(syncfs, int, fd) int ret; int fput_needed; -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif - file = fget_light(fd, &fput_needed); if (!file) return -EBADF; @@ -179,11 +170,6 @@ SYSCALL_DEFINE1(syncfs, int, fd) */ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { - -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif #ifdef CONFIG_DYNAMIC_FSYNC if (likely(dyn_fsync_active && !early_suspend_active)) return 0; @@ -208,11 +194,6 @@ EXPORT_SYMBOL(vfs_fsync_range); */ int vfs_fsync(struct file *file, int datasync) { -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif - return vfs_fsync_range(file, 0, LLONG_MAX, datasync); } EXPORT_SYMBOL(vfs_fsync); @@ -223,11 +204,6 @@ static int do_fsync(unsigned int fd, int datasync) int ret = -EBADF; int fput_needed; -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif - file = fget_light(fd, &fput_needed); if (file) { ret = vfs_fsync(file, datasync); @@ -238,10 +214,6 @@ static int do_fsync(unsigned int fd, int datasync) SYSCALL_DEFINE1(fsync, unsigned int, fd) { -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif #ifdef CONFIG_DYNAMIC_FSYNC if (likely(dyn_fsync_active && !early_suspend_active)) return 0; @@ -252,11 +224,7 @@ SYSCALL_DEFINE1(fsync, unsigned int, fd) SYSCALL_DEFINE1(fdatasync, unsigned int, fd) { -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif -#ifdef CONFIG_DYNAMIC_FSYNC +#if 0 if (likely(dyn_fsync_active && !early_suspend_active)) return 0; else @@ -274,11 +242,6 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) */ int generic_write_sync(struct file *file, loff_t pos, loff_t count) { -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) return 0; return vfs_fsync_range(file, pos, pos + count - 1, @@ -336,10 +299,6 @@ EXPORT_SYMBOL(generic_write_sync); SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, unsigned int flags) { -#ifdef CONFIG_FSYNC_CONTROL - if (!fsynccontrol_fsync_enabled()) - return 0; -#endif #ifdef CONFIG_DYNAMIC_FSYNC if (likely(dyn_fsync_active && !early_suspend_active)) return 0; From 0a2698c5554ea635b6c26da7095188c8c80949ee Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 6 Oct 2013 01:27:37 -0400 Subject: [PATCH 577/678] fsync_control: complete the removal --- drivers/misc/Kconfig | 6 -- drivers/misc/fsync_control.c | 110 ----------------------------------- 2 files changed, 116 deletions(-) delete mode 100644 drivers/misc/fsync_control.c diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 74ef5a57820..85eff542b78 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -577,10 +577,4 @@ source "drivers/misc/lis3lv02d/Kconfig" source "drivers/misc/carma/Kconfig" source "drivers/misc/tegra-baseband/Kconfig" -config FSYNC_CONTROL - bool "Support for FSync Control" - default y - help - Say Y here to enable FSync Control - endif # MISC_DEVICES diff --git a/drivers/misc/fsync_control.c b/drivers/misc/fsync_control.c deleted file mode 100644 index eceb8f7cc53..00000000000 --- a/drivers/misc/fsync_control.c +++ /dev/null @@ -1,110 +0,0 @@ -/* drivers/misc/fsync_control.c - * - * Copyright 2012 Ezekeel - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#define FSYNCCONTROL_VERSION 1 - -static bool fsync_enabled = true; - -bool fsynccontrol_fsync_enabled() -{ - return fsync_enabled; -} -EXPORT_SYMBOL(fsynccontrol_fsync_enabled); - -static ssize_t fsynccontrol_status_read(struct device * dev, struct device_attribute * attr, char * buf) -{ - return sprintf(buf, "%u\n", (fsync_enabled ? 1 : 0)); -} - -static ssize_t fsynccontrol_status_write(struct device * dev, struct device_attribute * attr, const char * buf, size_t size) -{ - unsigned int data; - - if(sscanf(buf, "%u\n", &data) == 1) - { - if (data == 1) - { - pr_info("%s: FSYNCCONTROL fsync enabled\n", __FUNCTION__); - - fsync_enabled = true; - - } - else if (data == 0) - { - pr_info("%s: FSYNCCONTROL fsync disabled\n", __FUNCTION__); - - fsync_enabled = false; - } - else - { - pr_info("%s: invalid input range %u\n", __FUNCTION__, data); - } - } - else - { - pr_info("%s: invalid input\n", __FUNCTION__); - } - - return size; -} - -static ssize_t fsynccontrol_version(struct device * dev, struct device_attribute * attr, char * buf) -{ - return sprintf(buf, "%u\n", FSYNCCONTROL_VERSION); -} - -static DEVICE_ATTR(fsync_enabled, S_IRUGO | S_IWUGO, fsynccontrol_status_read, fsynccontrol_status_write); -static DEVICE_ATTR(version, S_IRUGO , fsynccontrol_version, NULL); - -static struct attribute *fsynccontrol_attributes[] = - { - &dev_attr_fsync_enabled.attr, - &dev_attr_version.attr, - NULL - }; - -static struct attribute_group fsynccontrol_group = - { - .attrs = fsynccontrol_attributes, - }; - -static struct miscdevice fsynccontrol_device = - { - .minor = MISC_DYNAMIC_MINOR, - .name = "fsynccontrol", - }; - -static int __init fsynccontrol_init(void) -{ - int ret; - - pr_info("%s misc_register(%s)\n", __FUNCTION__, fsynccontrol_device.name); - - ret = misc_register(&fsynccontrol_device); - - if (ret) - { - pr_err("%s misc_register(%s) fail\n", __FUNCTION__, fsynccontrol_device.name); - return 1; - } - - if (sysfs_create_group(&fsynccontrol_device.this_device->kobj, &fsynccontrol_group) < 0) - { - pr_err("%s sysfs_create_group fail\n", __FUNCTION__); - pr_err("Failed to create sysfs group for device (%s)!\n", fsynccontrol_device.name); - } - - return 0; -} - -device_initcall(fsynccontrol_init); From c7f9ba779c0554b64f630fd00807bc16573fc534 Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 6 Oct 2013 01:28:13 -0400 Subject: [PATCH 578/678] ektf3k.c: add d2w wakelock interface --- drivers/input/touchscreen/ektf3k.c | 49 ++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index 7d07d46e191..61eab07af18 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -216,7 +216,7 @@ int s2w_begin_v = 150; int s2w_end_v = 1200; int s2w_begin_h = 350; int s2w_end_h = 1900; -int shortsweep = 1; +int shortsweep = 0; bool scr_suspended = false; int tripoff_vl = 0; int tripoff_vr = 0; @@ -236,11 +236,15 @@ unsigned int dt2w_y[2] = {0, 0}; unsigned int dt2w_2_x[2] = {0, 0}; unsigned int dt2w_2_y[2] = {0, 0}; //int is_suspended = 0; -#define S2W_TIMEOUT 30 +#define S2W_TIMEOUT 50 #define DT2W_TIMEOUT_MAX 50 #define DT2W_TIMEOUT_MIN 4 #define DT2W_DELTA 150 +static struct wake_lock d2w_wakelock; + +int wake_timeout = 60; + void sweep2wake_setdev(struct input_dev * input_device) { sweep2wake_pwrdev = input_device; return; @@ -738,6 +742,29 @@ static ssize_t elan_ktf3k_doubletap2wake_dump(struct device *dev, struct device_ static DEVICE_ATTR(doubletap2wake, (S_IWUSR|S_IRUGO), elan_ktf3k_doubletap2wake_show, elan_ktf3k_doubletap2wake_dump); +static ssize_t elan_ktf3k_wake_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + size_t count = 0; + count += sprintf(buf, "%d\n", wake_timeout); + return count; +} + +static ssize_t elan_ktf3k_wake_timeout_dump(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + wake_timeout = input; + + return count; +} + +static DEVICE_ATTR(wake_timeout, (S_IWUSR|S_IRUGO), + elan_ktf3k_wake_timeout_show, elan_ktf3k_wake_timeout_dump); + /* end sweep2wake sysfs*/ @@ -821,6 +848,7 @@ static struct attribute *elan_attr[] = { /* sweep2wake sysfs */ &dev_attr_sweep2wake.attr, &dev_attr_doubletap2wake.attr, + &dev_attr_wake_timeout.attr, NULL }; @@ -864,6 +892,11 @@ static int elan_ktf3k_touch_sysfs_init(void) touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); return ret; } + ret = sysfs_create_file(android_touch_kobj, &dev_attr_wake_timeout.attr); + if (ret) { + touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); + return ret; + } return 0 ; } @@ -875,6 +908,7 @@ static void elan_touch_sysfs_deinit(void) sysfs_remove_file(android_touch_kobj, &dev_attr_sweep2wake.attr); sysfs_remove_file(android_touch_kobj, &dev_attr_doubletap2wake.attr); sysfs_remove_file(android_touch_kobj, &dev_attr_shortsweep.attr); + sysfs_remove_file(android_touch_kobj, &dev_attr_wake_timeout.attr); kobject_del(android_touch_kobj); } @@ -1935,6 +1969,7 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, ts->status = 1; // set I2C status is OK; wake_lock_init(&ts->wakelock, WAKE_LOCK_SUSPEND, "elan_touch"); + wake_lock_init(&d2w_wakelock, WAKE_LOCK_SUSPEND, "d2w_wakelock"); if(err==0x80) touch_debug(DEBUG_INFO, "[ELAN] Touch is in boot mode!\n"); @@ -2085,6 +2120,7 @@ static int elan_ktf3k_ts_remove(struct i2c_client *client) destroy_workqueue(ts->elan_wq); input_unregister_device(ts->input_dev); wake_lock_destroy(&ts->wakelock); + wake_lock_destroy(&d2w_wakelock); #ifdef TOUCH_STRESS_TEST misc_deregister(&ts->misc_dev); #endif @@ -2133,7 +2169,11 @@ static int elan_ktf3k_ts_suspend(struct i2c_client *client, pm_message_t mesg) rc = elan_ktf3k_ts_set_power_state(client, PWR_STATE_DEEP_SLEEP); /*s2w*/ scr_suspended = true; - + if (wake_timeout == 0) { + wake_lock(&d2w_wakelock); + } else { + wake_lock_timeout(&d2w_wakelock, 100 * wake_timeout); + } return 0; } @@ -2173,6 +2213,9 @@ static int elan_ktf3k_ts_resume(struct i2c_client *client) dt2w_switch = dt2w_switch_temp; scr_suspended = false; + + if (wake_lock_active(&d2w_wakelock)) + wake_unlock(&d2w_wakelock); /* end s2w */ return 0; From f01fa6a01703a711a781f677d725a9b38832ce2c Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 6 Oct 2013 01:29:38 -0400 Subject: [PATCH 579/678] defconfig: a62 (updated) --- arch/arm/configs/metallice_grouper_defconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 5e94dc1f27c..914188c1f7c 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -44,10 +44,10 @@ CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_LZO=y CONFIG_HAVE_KERNEL_LZ4=y -# CONFIG_KERNEL_GZIP is not set +CONFIG_KERNEL_GZIP=y # CONFIG_KERNEL_LZMA is not set # CONFIG_KERNEL_LZO is not set -CONFIG_KERNEL_LZ4=y +# CONFIG_KERNEL_LZ4 is not set CONFIG_DEFAULT_HOSTNAME="(none)" # CONFIG_SWAP is not set # CONFIG_SYSVIPC is not set @@ -1123,7 +1123,6 @@ CONFIG_EEPROM_AT24=y CONFIG_TEGRA_BB_SUPPORT=y CONFIG_TEGRA_BB_POWER=y CONFIG_TEGRA_BB_M7400=y -CONFIG_FSYNC_CONTROL=y CONFIG_HAVE_IDE=y # CONFIG_IDE is not set From 6909ede6af6d8e553b3f8dc87bf50194bdf452e9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 17 Oct 2013 19:39:42 -0400 Subject: [PATCH 580/678] sweep2wake.h: add missing file --- include/linux/sweep2wake.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/sweep2wake.h diff --git a/include/linux/sweep2wake.h b/include/linux/sweep2wake.h new file mode 100644 index 00000000000..a1e2f5c3e16 --- /dev/null +++ b/include/linux/sweep2wake.h @@ -0,0 +1,23 @@ +/* +* include/linux/sweep2wake.h +* +* Copyright (c) 2013, Aaron Segaert (flar2) asegaert at gmail.com +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +* more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + + +extern void sweep2wake_setdev(struct input_dev * input_device); + From 3a8e3996bd7a95bffe3238436f3eb3d5232fdbe1 Mon Sep 17 00:00:00 2001 From: TripNRaVeR Date: Mon, 20 May 2013 23:53:26 +0200 Subject: [PATCH 581/678] video: tegra: backport Tegra SD Gen2 support from Tegra 4 Conflicts: drivers/video/tegra/Kconfig --- arch/arm/mach-tegra/include/mach/dc.h | 23 ++++ drivers/video/tegra/Kconfig | 6 + drivers/video/tegra/dc/dc_reg.h | 24 ++++ drivers/video/tegra/dc/nvsd.c | 156 ++++++++++++++++++++++++++ 4 files changed, 209 insertions(+) diff --git a/arch/arm/mach-tegra/include/mach/dc.h b/arch/arm/mach-tegra/include/mach/dc.h index a1e17cad96c..e19344dac15 100644 --- a/arch/arm/mach-tegra/include/mach/dc.h +++ b/arch/arm/mach-tegra/include/mach/dc.h @@ -269,6 +269,13 @@ struct tegra_dc_sd_agg_priorities { u8 agg[4]; }; +struct tegra_dc_sd_window { + u16 h_position; + u16 v_position; + u16 h_size; + u16 v_size; +}; + struct tegra_dc_sd_settings { unsigned enable; bool use_auto_pwm; @@ -290,6 +297,22 @@ struct tegra_dc_sd_settings { bool use_vid_luma; struct tegra_dc_sd_rgb coeff; + bool k_limit_enable; + u16 k_limit; + + bool sd_window_enable; + struct tegra_dc_sd_window sd_window; + + bool soft_clipping_enable; + u8 soft_clipping_threshold; + + bool smooth_k_enable; + u16 smooth_k_incr; + + bool sd_proc_control; + bool soft_clipping_correction; + bool use_vpulse2; + struct tegra_dc_sd_fc fc; struct tegra_dc_sd_blp blp; u8 bltf[4][4][4]; diff --git a/drivers/video/tegra/Kconfig b/drivers/video/tegra/Kconfig index 7de26267155..01d99e78cba 100644 --- a/drivers/video/tegra/Kconfig +++ b/drivers/video/tegra/Kconfig @@ -33,6 +33,12 @@ config TEGRA_DC_EXTENSIONS This exposes support for extended capabilities of the Tegra display controller to userspace drivers. +config TEGRA_SD_GEN2 + bool "Tegra SD Gen2 support" + default n + help + backported from Tegra4 (tripndroid) + config TEGRA_NVMAP bool "Tegra GPU memory management driver (nvmap)" default y diff --git a/drivers/video/tegra/dc/dc_reg.h b/drivers/video/tegra/dc/dc_reg.h index 2b8f8becb15..c1165e6eec3 100644 --- a/drivers/video/tegra/dc/dc_reg.h +++ b/drivers/video/tegra/dc/dc_reg.h @@ -491,6 +491,12 @@ #define SD_ONESHOT_ENABLE (1 << 10) #define SD_CORRECTION_MODE_AUTO (0 << 11) #define SD_CORRECTION_MODE_MAN (1 << 11) +#define SD_K_LIMIT_ENABLE (1 << 12) +#define SD_WINDOW_ENABLE (1 << 13) +#define SD_SOFT_CLIPPING_ENABLE (1 << 14) +#define SD_SMOOTH_K_ENABLE (1 << 15) +#define SD_VSYNC (0 << 28) +#define SD_VPULSE2 (1 << 28) #define NUM_BIN_WIDTHS 4 #define STEPS_PER_AGG_LVL 64 @@ -549,6 +555,24 @@ #define SD_MAN_K_G(x) (((x) & 0x3ff) << 10) #define SD_MAN_K_B(x) (((x) & 0x3ff) << 20) +#define DC_DISP_SD_K_LIMIT 0x4df +#define SD_K_LIMIT(x) (((x) & 0x3ff) << 0) + +#define DC_DISP_SD_WINDOW_POSITION 0x4e0 +#define SD_WIN_H_POSITION(x) (((x) & 0x1fff) << 0) +#define SD_WIN_V_POSITION(x) (((x) & 0x1fff) << 16) + +#define DC_DISP_SD_WINDOW_SIZE 0x4e1 +#define SD_WIN_H_SIZE(x) (((x) & 0x1fff) << 0) +#define SD_WIN_V_SIZE(x) (((x) & 0x1fff) << 16) + +#define DC_DISP_SD_SOFT_CLIPPING 0x4e2 +#define SD_SOFT_CLIPPING_THRESHOLD(x) (((x) & 0xff) << 0) +#define SD_SOFT_CLIPPING_RECIP(x) (((x) & 0xffff) << 16) + +#define DC_DISP_SD_SMOOTH_K 0x4e3 +#define SD_SMOOTH_K_INCR(x) (((x) & 0x3fff) << 0) + #define NUM_AGG_PRI_LVLS 4 #define SD_AGG_PRI_LVL(x) ((x) >> 3) #define SD_GET_AGG(x) ((x) & 0x7) diff --git a/drivers/video/tegra/dc/nvsd.c b/drivers/video/tegra/dc/nvsd.c index f320a1cfb3e..8eed135eb74 100644 --- a/drivers/video/tegra/dc/nvsd.c +++ b/drivers/video/tegra/dc/nvsd.c @@ -55,6 +55,17 @@ NVSD_ATTR(fc_time_limit); NVSD_ATTR(fc_threshold); NVSD_ATTR(lut); NVSD_ATTR(bltf); +#ifdef CONFIG_TEGRA_SD_GEN2 +NVSD_ATTR(k_limit_enable); +NVSD_ATTR(k_limit); +NVSD_ATTR(sd_window_enable); +NVSD_ATTR(sd_window); +NVSD_ATTR(soft_clipping_enable); +NVSD_ATTR(soft_clipping_threshold); +NVSD_ATTR(smooth_k_enable); +NVSD_ATTR(smooth_k_incr); +NVSD_ATTR(use_vpulse2); +#endif static struct kobj_attribute nvsd_attr_registers = __ATTR(registers, S_IRUGO, nvsd_registers_show, NULL); @@ -74,6 +85,17 @@ static struct attribute *nvsd_attrs[] = { NVSD_ATTRS_ENTRY(lut), NVSD_ATTRS_ENTRY(bltf), NVSD_ATTRS_ENTRY(registers), +#ifdef CONFIG_TEGRA_SD_GEN2 + NVSD_ATTRS_ENTRY(k_limit_enable), + NVSD_ATTRS_ENTRY(k_limit), + NVSD_ATTRS_ENTRY(sd_window_enable), + NVSD_ATTRS_ENTRY(sd_window), + NVSD_ATTRS_ENTRY(soft_clipping_enable), + NVSD_ATTRS_ENTRY(soft_clipping_threshold), + NVSD_ATTRS_ENTRY(smooth_k_enable), + NVSD_ATTRS_ENTRY(smooth_k_incr), + NVSD_ATTRS_ENTRY(use_vpulse2), +#endif NULL, }; @@ -429,6 +451,14 @@ void nvsd_init(struct tegra_dc *dc, struct tegra_dc_sd_settings *settings) bw_idx = nvsd_get_bw_idx(settings); + /* Values of SD LUT & BL TF are different according to bin_width on T30 + * due to HW bug. Therefore we use bin_width to select the correct table + * on T30. */ + +#ifdef CONFIG_TEGRA_SD_GEN2 + bw_idx = 0; +#endif + /* Write LUT */ if (!settings->cmd) { dev_dbg(&dc->ndev->dev, " LUT:\n"); @@ -504,6 +534,49 @@ void nvsd_init(struct tegra_dc *dc, struct tegra_dc_sd_settings *settings) tegra_dc_writel(dc, val, DC_DISP_SD_FLICKER_CONTROL); dev_dbg(&dc->ndev->dev, " FLICKER_CONTROL: 0x%08x\n", val); +#ifdef CONFIG_TEGRA_SD_GEN2 + /* Write K limit */ + if (settings->k_limit_enable) { + val = settings->k_limit; + if (val < 128) + val = 128; + else if (val > 255) + val = 255; + val = SD_K_LIMIT(val); + tegra_dc_writel(dc, val, DC_DISP_SD_K_LIMIT); + dev_dbg(&dc->ndev->dev, " K_LIMIT: 0x%08x\n", val); + } + + if (settings->sd_window_enable) { + /* Write sd window */ + val = SD_WIN_H_POSITION(settings->sd_window.h_position) | + SD_WIN_V_POSITION(settings->sd_window.v_position); + tegra_dc_writel(dc, val, DC_DISP_SD_WINDOW_POSITION); + dev_dbg(&dc->ndev->dev, " SD_WINDOW_POSITION: 0x%08x\n", val); + + val = SD_WIN_H_POSITION(settings->sd_window.h_size) | + SD_WIN_V_POSITION(settings->sd_window.v_size); + tegra_dc_writel(dc, val, DC_DISP_SD_WINDOW_SIZE); + dev_dbg(&dc->ndev->dev, " SD_WINDOW_SIZE: 0x%08x\n", val); + } + + if (settings->soft_clipping_enable) { + /* Write soft clipping */ + val = (64 * 1024) / (256 - settings->soft_clipping_threshold); + val = SD_SOFT_CLIPPING_RECIP(val) | + SD_SOFT_CLIPPING_THRESHOLD(settings->soft_clipping_threshold); + tegra_dc_writel(dc, val, DC_DISP_SD_SOFT_CLIPPING); + dev_dbg(&dc->ndev->dev, " SOFT_CLIPPING: 0x%08x\n", val); + } + + if (settings->smooth_k_enable) { + /* Write K incr value */ + val = SD_SMOOTH_K_INCR(settings->smooth_k_incr); + tegra_dc_writel(dc, val, DC_DISP_SD_SMOOTH_K); + dev_dbg(&dc->ndev->dev, " SMOOTH_K: 0x%08x\n", val); + } +#endif + /* Manage SD Control */ val = 0; /* Stay in manual correction mode until the next flip. */ @@ -520,6 +593,18 @@ void nvsd_init(struct tegra_dc *dc, struct tegra_dc_sd_settings *settings) val |= SD_AGGRESSIVENESS(settings->aggressiveness); /* Bin Width (value derived from bw_idx) */ val |= bw_idx << 3; +#ifdef CONFIG_TEGRA_SD_GEN2 + /* K limit enable */ + val |= (settings->k_limit_enable) ? SD_K_LIMIT_ENABLE : 0; + /* Programmable sd window enable */ + val |= (settings->sd_window_enable) ? SD_WINDOW_ENABLE : 0; + /* Soft clipping enable */ + val |= (settings->soft_clipping_enable) ? SD_SOFT_CLIPPING_ENABLE : 0; + /* Smooth K enable */ + val |= (settings->smooth_k_enable) ? SD_SMOOTH_K_ENABLE : 0; + /* SD proc control */ + val |= (settings->use_vpulse2) ? SD_VPULSE2 : SD_VSYNC; +#endif /* Finally, Write SD Control */ tegra_dc_writel(dc, val, DC_DISP_SD_CONTROL); dev_dbg(&dc->ndev->dev, " SD_CONTROL: 0x%08x\n", val); @@ -670,6 +755,39 @@ static ssize_t nvsd_settings_show(struct kobject *kobj, else if (IS_NVSD_ATTR(fc_threshold)) res = snprintf(buf, PAGE_SIZE, "%d\n", sd_settings->fc.threshold); +#ifdef CONFIG_TEGRA_SD_GEN2 + else if (IS_NVSD_ATTR(k_limit_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->k_limit_enable); + else if (IS_NVSD_ATTR(k_limit)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->k_limit); + else if (IS_NVSD_ATTR(sd_window_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->sd_window_enable); + else if (IS_NVSD_ATTR(sd_window)) + res = snprintf(buf, PAGE_SIZE, + "x: %d, y: %d, w: %d, h: %d\n", + sd_settings->sd_window.h_position, + sd_settings->sd_window.v_position, + sd_settings->sd_window.h_size, + sd_settings->sd_window.v_size); + else if (IS_NVSD_ATTR(soft_clipping_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->soft_clipping_enable); + else if (IS_NVSD_ATTR(soft_clipping_threshold)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->soft_clipping_threshold); + else if (IS_NVSD_ATTR(smooth_k_enable)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->smooth_k_enable); + else if (IS_NVSD_ATTR(smooth_k_incr)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->smooth_k_incr); + else if (IS_NVSD_ATTR(use_vpulse2)) + res = snprintf(buf, PAGE_SIZE, "%d\n", + sd_settings->use_vpulse2); +#endif else if (IS_NVSD_ATTR(lut)) res = nvsd_lut_show(sd_settings, buf, res); else if (IS_NVSD_ATTR(bltf)) @@ -824,6 +942,37 @@ static ssize_t nvsd_settings_store(struct kobject *kobj, nvsd_check_and_update(0, 255, fc.time_limit); } else if (IS_NVSD_ATTR(fc_threshold)) { nvsd_check_and_update(0, 255, fc.threshold); +#ifdef CONFIG_TEGRA_SD_GEN2 + } else if (IS_NVSD_ATTR(k_limit_enable)) { + nvsd_check_and_update(0, 1, k_limit_enable); + } else if (IS_NVSD_ATTR(k_limit)) { + nvsd_check_and_update(128, 255, k_limit); + } else if (IS_NVSD_ATTR(sd_window_enable)) { + nvsd_check_and_update(0, 1, sd_window_enable); + } else if (IS_NVSD_ATTR(sd_window)) { + int ele[4], i = 0, num = 4; + nvsd_get_multi(ele, num, i, 0, LONG_MAX); + + if (i == num) { + sd_settings->sd_window.h_position = ele[0]; + sd_settings->sd_window.v_position = ele[1]; + sd_settings->sd_window.h_size = ele[2]; + sd_settings->sd_window.v_size = ele[3]; + settings_updated = true; + } else { + res = -EINVAL; + } + } else if (IS_NVSD_ATTR(soft_clipping_enable)) { + nvsd_check_and_update(0, 1, soft_clipping_enable); + } else if (IS_NVSD_ATTR(soft_clipping_threshold)) { + nvsd_check_and_update(0, 255, soft_clipping_threshold); + } else if (IS_NVSD_ATTR(smooth_k_enable)) { + nvsd_check_and_update(0, 1, smooth_k_enable); + } else if (IS_NVSD_ATTR(smooth_k_incr)) { + nvsd_check_and_update(0, 16320, smooth_k_incr); + } else if (IS_NVSD_ATTR(use_vpulse2)) { + nvsd_check_and_update(0, 1, use_vpulse2); +#endif } else if (IS_NVSD_ATTR(lut)) { if (nvsd_lut_store(sd_settings, buf)) res = -EINVAL; @@ -912,6 +1061,13 @@ static ssize_t nvsd_registers_show(struct kobject *kobj, NVSD_PRINT_REG(DC_DISP_SD_BL_CONTROL); NVSD_PRINT_REG(DC_DISP_SD_HW_K_VALUES); NVSD_PRINT_REG(DC_DISP_SD_MAN_K_VALUES); +#ifdef CONFIG_TEGRA_SD_GEN2 + NVSD_PRINT_REG(DC_DISP_SD_K_LIMIT); + NVSD_PRINT_REG(DC_DISP_SD_WINDOW_POSITION); + NVSD_PRINT_REG(DC_DISP_SD_WINDOW_SIZE); + NVSD_PRINT_REG(DC_DISP_SD_SOFT_CLIPPING); + NVSD_PRINT_REG(DC_DISP_SD_SMOOTH_K); +#endif return res; } From d87e7a26207038701eed147f3af0a05cec4cbaba Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 21 Oct 2013 17:47:52 -0400 Subject: [PATCH 582/678] mach-tegra: board-grouper-panel.c: updated for updated smartdimmer --- arch/arm/mach-tegra/board-grouper-panel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/mach-tegra/board-grouper-panel.c b/arch/arm/mach-tegra/board-grouper-panel.c index 1566de965cb..a8a643b2668 100755 --- a/arch/arm/mach-tegra/board-grouper-panel.c +++ b/arch/arm/mach-tegra/board-grouper-panel.c @@ -446,6 +446,16 @@ static struct tegra_dc_sd_settings grouper_sd_settings = { .aggressiveness = 1, .phase_in_adjustments = true, .panel_min_brightness = 10, +#ifdef CONFIG_TEGRA_SD_GEN2 + .k_limit_enable = true, + .k_limit = 180, + .sd_window_enable = false, + .soft_clipping_enable = true, + /* Low soft clipping threshold to compensate for aggressive k_limit */ + .soft_clipping_threshold = 128, + .smooth_k_enable = true, + .smooth_k_incr = 4, +#endif .use_vid_luma = false, /* Default video coefficients */ .coeff = {5, 9, 2}, From 17d0bf5afd1927c0b2ef0495d8f529ce09821a2f Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 21 Oct 2013 17:52:29 -0400 Subject: [PATCH 583/678] defconfig: a63 --- arch/arm/configs/metallice_grouper_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 914188c1f7c..f9eb6cf11e2 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a62" +CONFIG_LOCALVERSION="-MKernel-a63" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -2154,6 +2154,7 @@ CONFIG_TEGRA_GRHOST=y CONFIG_TEGRA_DC=y CONFIG_FB_TEGRA=y CONFIG_TEGRA_DC_EXTENSIONS=y +CONFIG_TEGRA_SD_GEN2=y CONFIG_TEGRA_NVMAP=y CONFIG_NVMAP_RECLAIM_UNPINNED_VM=y CONFIG_NVMAP_ALLOW_SYSMEM=y From 2246255519edb53761f9dab18898067d3afcc1f6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 8 Mar 2013 02:07:16 +0000 Subject: [PATCH 584/678] ipv6: introdcue __ipv6_addr_needs_scope_id and ipv6_iface_scope_id helper functions [net-next commit b7ef213ef65256168df83ddfbb8131ed9adc10f9] __ipv6_addr_needs_scope_id checks if an ipv6 address needs to supply a 'sin6_scope_id != 0'. 'sin6_scope_id != 0' was enforced in case of link-local addresses. To support interface-local multicast these checks had to be enhanced and are now consolidated into these new helper functions. v2: a) migrated to struct ipv6_addr_props v3: a) reverted changes for ipv6_addr_props b) test for address type instead of comparing scope v4: a) unchanged Change-Id: Id6fc54cec61f967928e08a9eba4f857157d973a3 Suggested-by: YOSHIFUJI Hideaki Cc: YOSHIFUJI Hideaki Acked-by: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3b5ac1fbff3..b47e743b1d7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -285,6 +285,18 @@ static inline int ipv6_addr_src_scope(const struct in6_addr *addr) return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } +static inline bool __ipv6_addr_needs_scope_id(int type) +{ + return type & IPV6_ADDR_LINKLOCAL || + (type & IPV6_ADDR_MULTICAST && + (type & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL))); +} + +static inline __u32 ipv6_iface_scope_id(const struct in6_addr *addr, int iface) +{ + return __ipv6_addr_needs_scope_id(__ipv6_addr_type(addr)) ? iface : 0; +} + static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) { return memcmp(a1, a2, sizeof(struct in6_addr)); From f105bf93fbac869b46b5e72a70c408a38654f998 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Jan 2013 22:09:49 +0000 Subject: [PATCH 585/678] net: ipv6: Add IPv6 support to the ping socket. [backport of net-next 6d0bfe22611602f36617bc7aa2ffa1bbb2f54c67] This adds the ability to send ICMPv6 echo requests without a raw socket. The equivalent ability for ICMPv4 was added in 2011. Instead of having separate code paths for IPv4 and IPv6, make most of the code in net/ipv4/ping.c dual-stack and only add a few IPv6-specific bits (like the protocol definition) to a new net/ipv6/ping.c. Hopefully this will reduce divergence and/or duplication of bugs in the future. Caveats: - Setting options via ancillary data (e.g., using IPV6_PKTINFO to specify the outgoing interface) is not yet supported. - There are no separate security settings for IPv4 and IPv6; everything is controlled by /proc/net/ipv4/ping_group_range. - The proc interface does not yet display IPv6 ping sockets properly. Tested with a patched copy of ping6 and using raw socket calls. Compiles and works with all of CONFIG_IPV6={n,m,y}. Change-Id: Ia359af556021344fc7f890c21383aadf950b6498 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller [lorenzo@google.com: backported to 3.0] Signed-off-by: Lorenzo Colitti Conflicts: net/ipv4/ping.c --- include/net/ipv6.h | 8 + include/net/ping.h | 50 +++- include/net/transp_v6.h | 3 + net/ipv4/af_inet.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/ping.c | 564 ++++++++++++++++++++++++++++------------ net/ipv6/Makefile | 2 +- net/ipv6/af_inet6.c | 12 + net/ipv6/icmp.c | 27 +- net/ipv6/ping.c | 212 +++++++++++++++ 10 files changed, 710 insertions(+), 172 deletions(-) create mode 100644 net/ipv6/ping.c diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b47e743b1d7..0bca86fc77d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -243,6 +243,14 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) atomic_dec(&fl->users); } +extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); + +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len); + +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6); + extern int ip6_ra_control(struct sock *sk, int sel); extern int ipv6_parse_hopopts(struct sk_buff *skb); diff --git a/include/net/ping.h b/include/net/ping.h index 682b5ae9af5..c103135efe2 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -13,6 +13,7 @@ #ifndef _PING_H #define _PING_H +#include #include /* PING_HTABLE_SIZE must be power of 2 */ @@ -28,6 +29,18 @@ */ #define GID_T_MAX (((gid_t)~0U) >> 1) +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +struct pingv6_ops { + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); + int (*datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); + int (*icmpv6_err_convert)(u8 type, u8 code, int *err); + void (*ipv6_icmp_error)(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload); + int (*ipv6_chk_addr)(struct net *net, const struct in6_addr *addr, + struct net_device *dev, int strict); +}; + struct ping_table { struct hlist_nulls_head hash[PING_HTABLE_SIZE]; rwlock_t lock; @@ -39,10 +52,40 @@ struct ping_iter_state { }; extern struct proto ping_prot; +extern struct ping_table ping_table; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +extern struct pingv6_ops pingv6_ops; +#endif +struct pingfakehdr { + struct icmphdr icmph; + struct iovec *iov; + sa_family_t family; + __wsum wcheck; +}; -extern void ping_rcv(struct sk_buff *); -extern void ping_err(struct sk_buff *, u32 info); +int ping_get_port(struct sock *sk, unsigned short ident); +void ping_hash(struct sock *sk); +void ping_unhash(struct sock *sk); + +int ping_init_sock(struct sock *sk); +void ping_close(struct sock *sk, long timeout); +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len); +void ping_err(struct sk_buff *skb, int offset, u32 info); +void ping_v4_err(struct sk_buff *skb, u32 info); +int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, + struct sk_buff *); + +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len); +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len); +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len); +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len); +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +void ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int __init ping_proc_init(void); @@ -50,6 +93,7 @@ extern void ping_proc_exit(void); #endif void __init ping_init(void); - +int __init pingv6_init(void); +void pingv6_exit(void); #endif /* _PING_H */ diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 498433dd067..48b42ea9c2f 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -11,6 +11,7 @@ extern struct proto rawv6_prot; extern struct proto udpv6_prot; extern struct proto udplitev6_prot; extern struct proto tcpv6_prot; +extern struct proto pingv6_prot; struct flowi6; @@ -21,6 +22,8 @@ extern int ipv6_frag_init(void); extern void ipv6_frag_exit(void); /* transport protocols */ +extern int pingv6_init(void); +extern void pingv6_exit(void); extern int rawv6_init(void); extern void rawv6_exit(void); extern int udpv6_init(void); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bf488051a8d..da5a884db47 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1559,7 +1559,7 @@ static const struct net_protocol udp_protocol = { static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, - .err_handler = ping_err, + .err_handler = ping_v4_err, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 23ef31baa1a..cd9a67df0b5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -790,7 +790,7 @@ static void icmp_redirect(struct sk_buff *skb) if (iph->protocol == IPPROTO_ICMP && iph->ihl >= 5 && pskb_may_pull(skb, (iph->ihl<<2)+8)) { - ping_err(skb, icmp_hdr(skb)->un.gateway); + ping_v4_err(skb, icmp_hdr(skb)->un.gateway); } out: diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 39b403f854c..290188ede55 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -46,8 +45,18 @@ #include #include +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#include +#include +#include +#include +#include +#endif -static struct ping_table ping_table; + +struct ping_table ping_table; +struct pingv6_ops pingv6_ops; +EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; @@ -57,6 +66,7 @@ static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask) pr_debug("hash(%d) = %d\n", num, res); return res; } +EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned num) @@ -64,7 +74,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } -static int ping_v4_get_port(struct sock *sk, unsigned short ident) +int ping_get_port(struct sock *sk, unsigned short ident) { struct hlist_nulls_node *node; struct hlist_nulls_head *hlist; @@ -102,6 +112,10 @@ static int ping_v4_get_port(struct sock *sk, unsigned short ident) ping_portaddr_for_each_entry(sk2, node, hlist) { isk2 = inet_sk(sk2); + /* BUG? Why is this reuse and not reuseaddr? ping.c + * doesn't turn off SO_REUSEADDR, and it doesn't expect + * that other ping processes can steal its packets. + */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) @@ -124,17 +138,18 @@ static int ping_v4_get_port(struct sock *sk, unsigned short ident) write_unlock_bh(&ping_table.lock); return 1; } +EXPORT_SYMBOL_GPL(ping_get_port); -static void ping_v4_hash(struct sock *sk) +void ping_hash(struct sock *sk) { - pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); + pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ } -static void ping_v4_unhash(struct sock *sk) +void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); - pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); @@ -144,31 +159,61 @@ static void ping_v4_unhash(struct sock *sk) write_unlock_bh(&ping_table.lock); } } +EXPORT_SYMBOL_GPL(ping_unhash); -static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr, - u16 ident, int dif) +static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + + if (skb->protocol == htons(ETH_P_IP)) { + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &ip_hdr(skb)->daddr, dif); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", + (int)ident, &ipv6_hdr(skb)->daddr, dif); +#endif + } - pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n", - (int)ident, (unsigned long)daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk, - (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr, - sk->sk_bound_dev_if); - pr_debug("iterate\n"); if (isk->inet_num != ident) continue; - if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) - continue; + + if (skb->protocol == htons(ETH_P_IP) && + sk->sk_family == AF_INET) { + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, + (int) isk->inet_num, &isk->inet_rcv_saddr, + sk->sk_bound_dev_if); + + if (isk->inet_rcv_saddr && + isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + continue; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (skb->protocol == htons(ETH_P_IPV6) && + sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, + (int) isk->inet_num, + &inet6_sk(sk)->rcv_saddr, + sk->sk_bound_dev_if); + + if (!ipv6_addr_any(&np->rcv_saddr) && + !ipv6_addr_equal(&np->rcv_saddr, + &ipv6_hdr(skb)->daddr)) + continue; +#endif + } + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; @@ -197,7 +242,7 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, } -static int ping_init_sock(struct sock *sk) +int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); gid_t group = current_egid(); @@ -223,8 +268,9 @@ static int ping_init_sock(struct sock *sk) return -EACCES; } +EXPORT_SYMBOL_GPL(ping_init_sock); -static void ping_close(struct sock *sk, long timeout) +void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); @@ -232,36 +278,122 @@ static void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } +EXPORT_SYMBOL_GPL(ping_close); + +/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ +int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { + struct net *net = sock_net(sk); + if (sk->sk_family == AF_INET) { + struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + int chk_addr_ret; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", + sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); + + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + + if ((sysctl_ip_nonlocal_bind == 0 && + isk->freebind == 0 && isk->transparent == 0 && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) + return -EADDRNOTAVAIL; + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (sk->sk_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + int addr_type, scoped, has_addr; + struct net_device *dev = NULL; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", + sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); + + addr_type = ipv6_addr_type(&addr->sin6_addr); + scoped = __ipv6_addr_needs_scope_id(addr_type); + if ((addr_type != IPV6_ADDR_ANY && + !(addr_type & IPV6_ADDR_UNICAST)) || + (scoped && !addr->sin6_scope_id)) + return -EINVAL; + + rcu_read_lock(); + if (addr->sin6_scope_id) { + dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } + has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, + scoped); + rcu_read_unlock(); + + if (!(isk->freebind || isk->transparent || has_addr || + addr_type == IPV6_ADDR_ANY)) + return -EADDRNOTAVAIL; + + if (scoped) + sk->sk_bound_dev_if = addr->sin6_scope_id; +#endif + } else { + return -EAFNOSUPPORT; + } + return 0; +} +void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +{ + if (saddr->sa_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + struct sockaddr_in *addr = (struct sockaddr_in *) saddr; + isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (saddr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; + struct ipv6_pinfo *np = inet6_sk(sk); + np->rcv_saddr = np->saddr = addr->sin6_addr; +#endif + } +} + +void ping_clear_saddr(struct sock *sk, int dif) +{ + sk->sk_bound_dev_if = dif; + if (sk->sk_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + isk->inet_rcv_saddr = isk->inet_saddr = 0; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&np->saddr, 0, sizeof(np->saddr)); +#endif + } +} /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ -static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *isk = inet_sk(sk); unsigned short snum; - int chk_addr_ret; int err; + int dif = sk->sk_bound_dev_if; - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); - - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == INADDR_ANY) - chk_addr_ret = RTN_LOCAL; - - if ((sysctl_ip_nonlocal_bind == 0 && - isk->freebind == 0 && isk->transparent == 0 && - chk_addr_ret != RTN_LOCAL) || - chk_addr_ret == RTN_MULTICAST || - chk_addr_ret == RTN_BROADCAST) - return -EADDRNOTAVAIL; + err = ping_check_bind_addr(sk, isk, uaddr, addr_len); + if (err) + return err; lock_sock(sk); @@ -270,42 +402,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; err = -EADDRINUSE; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; - snum = ntohs(addr->sin_port); - if (ping_v4_get_port(sk, snum) != 0) { - isk->inet_saddr = isk->inet_rcv_saddr = 0; + ping_set_saddr(sk, uaddr); + snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); + if (ping_get_port(sk, snum) != 0) { + ping_clear_saddr(sk, dif); goto out; } - pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n", - (int)isk->inet_num, - (unsigned long) isk->inet_rcv_saddr, - (int)sk->sk_bound_dev_if); + pr_debug("after bind(): num = %d, dif = %d\n", + (int)isk->inet_num, + (int)sk->sk_bound_dev_if); err = 0; - if (isk->inet_rcv_saddr) + if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || + (sk->sk_family == AF_INET6 && + !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + if (sk->sk_family == AF_INET6) + memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); +#endif + sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ -static inline int ping_supported(int type, int code) +static inline int ping_supported(int family, int type, int code) { - if (type == ICMP_ECHO && code == 0) - return 1; - return 0; + return (family == AF_INET && type == ICMP_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); } /* @@ -313,30 +453,44 @@ static inline int ping_supported(int type, int code) * sort of error condition. */ -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -void ping_err(struct sk_buff *skb, u32 info) +void ping_err(struct sk_buff *skb, int offset, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int family; + struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmph->type; - int code = icmph->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + offset = iph->ihl << 2; + family = AF_INET; + type = icmp_hdr(skb)->type; + code = icmp_hdr(skb)->code; + icmph = (struct icmphdr *)(skb->data + offset); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + family = AF_INET6; + type = icmp6_hdr(skb)->icmp6_type; + code = icmp6_hdr(skb)->icmp6_code; + icmph = (struct icmphdr *) (skb->data + offset); + } else { + BUG(); + } + /* We assume the packet has already been checked by icmp_unreach */ - if (!ping_supported(icmph->type, icmph->code)) + if (!ping_supported(family, icmph->type, icmph->code)) return; - pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", + skb->protocol, type, code, ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); - sk = ping_v4_lookup(net, iph->daddr, iph->saddr, - ntohs(icmph->un.echo.id), skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk == NULL) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); pr_debug("no socket, dropping\n"); @@ -348,70 +502,85 @@ void ping_err(struct sk_buff *skb, u32 info) harderr = 0; inet_sock = inet_sk(sk); - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - /* This is not a real error but ping wants to see it. - * Report it with some fake errno. */ - err = EREMOTEIO; - break; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; + if (skb->protocol == htons(ETH_P_IP)) { + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + /* This is not a real error but ping wants to see it. + * Report it with some fake errno. */ + err = EREMOTEIO; + break; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + case ICMP_REDIRECT: + /* See ICMP_SOURCE_QUENCH */ + err = EREMOTEIO; + break; } - break; - case ICMP_REDIRECT: - /* See ICMP_SOURCE_QUENCH */ - err = EREMOTEIO; - break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); +#endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if (!inet_sock->recverr) { + if ((family == AF_INET && !inet_sock->recverr) || + (family == AF_INET6 && !inet6_sk(sk)->recverr)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, 0 /* no remote port */, - info, (u8 *)icmph); + if (family == AF_INET) { + ip_icmp_error(sk, skb, err, 0 /* no remote port */, + info, (u8 *)icmph); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, + info, (u8 *)icmph); +#endif + } } sk->sk_err = err; sk->sk_error_report(sk); out: sock_put(sk); } +EXPORT_SYMBOL_GPL(ping_err); + +void ping_v4_err(struct sk_buff *skb, u32 info) +{ + ping_err(skb, 0, info); +} /* - * Copy and checksum an ICMP Echo packet from user space into a buffer. + * Copy and checksum an ICMP Echo packet from user space into a buffer + * starting from the payload. */ -struct pingfakehdr { - struct icmphdr icmph; - struct iovec *iov; - u32 wcheck; -}; - -static int ping_getfrag(void *from, char * to, - int offset, int fraglen, int odd, struct sk_buff *skb) +int ping_getfrag(void *from, char *to, + int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; @@ -422,20 +591,33 @@ static int ping_getfrag(void *from, char * to, pfh->iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; + } else if (offset < sizeof(struct icmphdr)) { + BUG(); + } else { + if (csum_partial_copy_fromiovecend + (to, pfh->iov, offset - sizeof(struct icmphdr), + fraglen, &pfh->wcheck)) + return -EFAULT; + } - return 0; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + /* For IPv6, checksum each skb as we go along, as expected by + * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in + * wcheck, it will be finalized in ping_v4_push_pending_frames. + */ + if (pfh->family == AF_INET6) { + skb->csum = pfh->wcheck; + skb->ip_summed = CHECKSUM_NONE; + pfh->wcheck = 0; } - if (offset < sizeof(struct icmphdr)) - BUG(); - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) - return -EFAULT; +#endif + return 0; } +EXPORT_SYMBOL_GPL(ping_getfrag); -static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, - struct flowi4 *fl4) +static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); @@ -447,24 +629,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, return ip_push_pending_frames(sk, fl4); } -static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct net *net = sock_net(sk); - struct flowi4 fl4; - struct inet_sock *inet = inet_sk(sk); - struct ipcm_cookie ipc; - struct icmphdr user_icmph; - struct pingfakehdr pfh; - struct rtable *rt = NULL; - struct ip_options_data opt_copy; - int free = 0; - u32 saddr, daddr, faddr; - u8 tos; - int err; - - pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len) { + u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; @@ -479,15 +646,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* * Fetch the ICMP header provided by the userland. - * iovec is modified! + * iovec is modified! The ICMP header is consumed. */ - - if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, - sizeof(struct icmphdr))) + if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len)) return -EFAULT; - if (!ping_supported(user_icmph.type, user_icmph.code)) + + if (family == AF_INET) { + type = ((struct icmphdr *) user_icmph)->type; + code = ((struct icmphdr *) user_icmph)->code; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + type = ((struct icmp6hdr *) user_icmph)->icmp6_type; + code = ((struct icmp6hdr *) user_icmph)->icmp6_code; +#endif + } else { + BUG(); + } + + if (!ping_supported(family, type, code)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(ping_common_sendmsg); + +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct net *net = sock_net(sk); + struct flowi4 fl4; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct icmphdr user_icmph; + struct pingfakehdr pfh; + struct rtable *rt = NULL; + struct ip_options_data opt_copy; + int free = 0; + __be32 saddr, daddr, faddr; + u8 tos; + int err; + + pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + /* * Get and verify the address. */ @@ -592,13 +797,14 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.iov = msg->msg_iov; pfh.wcheck = 0; + pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else - err = ping_push_pending_frames(sk, &pfh, &fl4); + err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: @@ -619,11 +825,13 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; } -static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -632,11 +840,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + return pingv6_ops.ipv6_recv_error(sk, msg, len); +#endif + } + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -655,15 +874,41 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); - /* Copy the address. */ - if (sin) { + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { + sin = (struct sockaddr_in *) msg->msg_name; sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + + if (np->sndflow) + sin6->sin6_flowinfo = + *(__be32 *)ip6 & IPV6_FLOWINFO_MASK; + + if (__ipv6_addr_needs_scope_id( + ipv6_addr_type(&sin6->sin6_addr))) + sin6->sin6_scope_id = IP6CB(skb)->iif; + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.datagram_recv_ctl(sk, msg, skb); +#endif + } else { + BUG(); } - if (isk->cmsg_flags) - ip_cmsg_recv(msg, skb); + err = copied; done: @@ -672,8 +917,9 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pr_debug("ping_recvmsg -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_recvmsg); -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); @@ -685,6 +931,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* @@ -695,10 +942,7 @@ void ping_rcv(struct sk_buff *skb) { struct sock *sk; struct net *net = dev_net(skb->dev); - struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - u32 saddr = iph->saddr; - u32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ @@ -708,8 +952,7 @@ void ping_rcv(struct sk_buff *skb) /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); - sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), - skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { pr_debug("rcv on socket %p\n", sk); ping_queue_rcv_skb(sk, skb_get(skb)); @@ -720,6 +963,7 @@ void ping_rcv(struct sk_buff *skb) /* We're called from icmp_rcv(). kfree_skb() is done there. */ } +EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -730,13 +974,13 @@ struct proto ping_prot = { .disconnect = udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .sendmsg = ping_sendmsg, + .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_v4_hash, - .unhash = ping_v4_unhash, - .get_port = ping_v4_get_port, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934acfac..753be5dd409 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4252b3cc183..4be7f253a5f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -1129,6 +1130,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_udplite_proto; + err = proto_register(&pingv6_prot, 1); + if (err) + goto out_unregister_ping_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. @@ -1222,6 +1226,10 @@ static int __init inet6_init(void) if (err) goto ipv6_packet_fail; + err = pingv6_init(); + if (err) + goto pingv6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1234,6 +1242,8 @@ static int __init inet6_init(void) sysctl_fail: ipv6_packet_cleanup(); #endif +pingv6_fail: + pingv6_exit(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -1281,6 +1291,8 @@ static int __init inet6_init(void) rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); +out_unregister_ping_proto: + proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 11900417b1c..6a12eda8761 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -80,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net) return net->ipv6.icmp_sk[smp_processor_id()]; } +static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ + struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); + + if (!(type & ICMPV6_INFOMSG_MASK)) + if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) + ping_err(skb, offset, info); +} + static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, + .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -217,7 +230,8 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -300,8 +314,8 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -595,7 +609,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) +void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; @@ -686,7 +700,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto discard_it; } @@ -707,7 +722,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - /* we couldn't care less */ + ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 00000000000..6472aca817b --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,212 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +int dummy_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev, int strict) +{ + return 0; +} + +int __init pingv6_init(void) +{ + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.datagram_recv_ctl = datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.datagram_recv_ctl = dummy_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; + inet6_unregister_protosw(&pingv6_protosw); +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + + return err; +} From eba41bba2c0b971332b464675e364258b068421f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Jun 2013 22:43:52 +0000 Subject: [PATCH 586/678] ping: always initialize ->sin6_scope_id and ->sin6_flowinfo [net-next commit c26d6b46da3ee86fa8a864347331e5513ca84c2b] If we don't need scope id, we should initialize it to zero. Same for ->sin6_flowinfo. Change-Id: I28e4bc9593e76fc3434052182466fab4bb8ccf3a Cc: Lorenzo Colitti Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 290188ede55..a5d0173bf87 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -894,13 +894,13 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; + sin6->sin6_flowinfo = 0; if (np->sndflow) sin6->sin6_flowinfo = *(__be32 *)ip6 & IPV6_FLOWINFO_MASK; - if (__ipv6_addr_needs_scope_id( - ipv6_addr_type(&sin6->sin6_addr))) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); if (inet6_sk(sk)->rxopt.all) pingv6_ops.datagram_recv_ctl(sk, msg, skb); From c441a2e328eb57476d0af0150e756d48c09d1d75 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 23 Jul 2013 11:14:31 -0700 Subject: [PATCH 587/678] net: wireless: bcmdhd: Enable p2p support bits for p2p device Change-Id: Ie3537aaeecdbbddb5219b41c42f2f6ac5d85f5b4 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 99223be786a..89861ef9a0e 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -6799,11 +6799,10 @@ s32 wl_cfg80211_attach_post(struct net_device *ndev) if (wl && !wl_get_drv_status(wl, READY, ndev)) { if (wl->wdev && wl_cfgp2p_supported(wl, ndev)) { -#if !defined(WL_ENABLE_P2P_IF) wl->wdev->wiphy->interface_modes |= (BIT(NL80211_IFTYPE_P2P_CLIENT)| BIT(NL80211_IFTYPE_P2P_GO)); -#endif + if ((err = wl_cfgp2p_init_priv(wl)) != 0) goto fail; From 475c9cea9c9452264adc90949d8757d422a0a5bd Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 4 Jul 2013 00:52:49 +0900 Subject: [PATCH 588/678] net: ipv6: add missing lock in ping_v6_sendmsg [net-next commit a1bdc45580fc19e968b32ad27cd7e476a4aa58f6] Bug: 9469865 Change-Id: I7ae28d29c645c535e570eb8d12f45e8eafd9c70b Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/ping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 6472aca817b..c0f329be337 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -193,6 +193,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); + lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, 0, hlimit, np->tclass, NULL, &fl6, rt, @@ -207,6 +208,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, (struct icmp6hdr *) &pfh.icmph, len); } + release_sock(sk); return err; } From 95be724be07d65fda291cd4fa2d0a5a2b56db0f8 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 4 Jul 2013 00:12:40 +0900 Subject: [PATCH 589/678] net: ipv6: fix wrong ping_v6_sendmsg return value [net-next commit fbfe80c890a1dc521d0b629b870e32fcffff0da5] ping_v6_sendmsg currently returns 0 on success. It should return the number of bytes written instead. Bug: 9469865 Change-Id: I86eb936e06bf8582975d59597e48e2bcc53b958d Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/ping.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c0f329be337..bc30c4b4e48 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -210,5 +210,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } release_sock(sk); - return err; + if (err) + return err; + + return len; } From 12bfe6e37ac462abde3fafe3f3db468b0d4c4934 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 21 Aug 2013 11:11:33 -0700 Subject: [PATCH 590/678] net: wireless: bcmdhd: Fix roaming to hidden AP Change-Id: Id64d12962049833e19705fbe109ef04b60014079 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 89861ef9a0e..176cdb34f97 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -289,6 +289,7 @@ static void wl_ch_to_chanspec(int ch, */ static void wl_rst_ie(struct wl_priv *wl); static __used s32 wl_add_ie(struct wl_priv *wl, u8 t, u8 l, u8 *v); +static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u16 ie_size); static s32 wl_mrg_ie(struct wl_priv *wl, u8 *ie_stream, u16 ie_size); static s32 wl_cp_ie(struct wl_priv *wl, u8 *dst, u16 dst_size); static u32 wl_get_ielen(struct wl_priv *wl); @@ -4846,6 +4847,7 @@ static s32 wl_inform_single_bss(struct wl_priv *wl, struct wl_bss_info *bi) beacon_proberesp->capab_info = cpu_to_le16(bi->capability); wl_rst_ie(wl); + wl_update_hidden_ap_ie(bi, ((u8 *) bi) + bi->ie_offset, bi->ie_length); wl_mrg_ie(wl, ((u8 *) bi) + bi->ie_offset, bi->ie_length); wl_cp_ie(wl, beacon_proberesp->variable, WL_BSS_INFO_MAX - offsetof(struct wl_cfg80211_bss_info, frame_buf)); @@ -5443,6 +5445,7 @@ static s32 wl_update_bss_info(struct wl_priv *wl, struct net_device *ndev) } bi = (struct wl_bss_info *)(wl->extra_buf + 4); if (memcmp(bi->BSSID.octet, curbssid, ETHER_ADDR_LEN)) { + WL_ERR(("Bssid doesn't match\n")); err = -EIO; goto update_bss_info_out; } @@ -5482,6 +5485,9 @@ static s32 wl_update_bss_info(struct wl_priv *wl, struct net_device *ndev) wl_update_prof(wl, ndev, NULL, &dtim_period, WL_PROF_DTIMPERIOD); update_bss_info_out: + if (unlikely(err)) { + WL_ERR(("Failed with error %d\n", err)); + } mutex_unlock(&wl->usr_sync); return err; } @@ -7659,6 +7665,22 @@ static __used s32 wl_add_ie(struct wl_priv *wl, u8 t, u8 l, u8 *v) return err; } +static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u16 ie_size) +{ + const u8 *ssidie; + + ssidie = cfg80211_find_ie(WLAN_EID_SSID, ie_stream, ie_size); + if (!ssidie) + return; + if (ssidie[1] != bi->SSID_len) { + WL_ERR(("%s: Wrong SSID len: %d != %d\n", __func__, ssidie[1], bi->SSID_len)); + return; + } + if (*(ssidie + 2) == '\0') + memcpy((void *)(ssidie + 2), bi->SSID, bi->SSID_len); + return; +} + static s32 wl_mrg_ie(struct wl_priv *wl, u8 *ie_stream, u16 ie_size) { struct wl_ie *ie = wl_to_ie(wl); From 6e6eb45dd8758b22288a0ee72d27b6e6f1278956 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 22 Aug 2013 09:37:57 -0700 Subject: [PATCH 591/678] net: wireless: bcmdhd: Inject EID value if it has 0 length Change-Id: Ifd102bee45e1a1e04ba015c39631a741ca74d6ee Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 176cdb34f97..d8ad4ef2a2a 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -289,7 +289,7 @@ static void wl_ch_to_chanspec(int ch, */ static void wl_rst_ie(struct wl_priv *wl); static __used s32 wl_add_ie(struct wl_priv *wl, u8 t, u8 l, u8 *v); -static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u16 ie_size); +static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u32 *ie_size); static s32 wl_mrg_ie(struct wl_priv *wl, u8 *ie_stream, u16 ie_size); static s32 wl_cp_ie(struct wl_priv *wl, u8 *dst, u16 dst_size); static u32 wl_get_ielen(struct wl_priv *wl); @@ -4847,7 +4847,7 @@ static s32 wl_inform_single_bss(struct wl_priv *wl, struct wl_bss_info *bi) beacon_proberesp->capab_info = cpu_to_le16(bi->capability); wl_rst_ie(wl); - wl_update_hidden_ap_ie(bi, ((u8 *) bi) + bi->ie_offset, bi->ie_length); + wl_update_hidden_ap_ie(bi, ((u8 *) bi) + bi->ie_offset, &bi->ie_length); wl_mrg_ie(wl, ((u8 *) bi) + bi->ie_offset, bi->ie_length); wl_cp_ie(wl, beacon_proberesp->variable, WL_BSS_INFO_MAX - offsetof(struct wl_cfg80211_bss_info, frame_buf)); @@ -7665,19 +7665,26 @@ static __used s32 wl_add_ie(struct wl_priv *wl, u8 t, u8 l, u8 *v) return err; } -static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u16 ie_size) +static void wl_update_hidden_ap_ie(struct wl_bss_info *bi, u8 *ie_stream, u32 *ie_size) { - const u8 *ssidie; + u8 *ssidie; - ssidie = cfg80211_find_ie(WLAN_EID_SSID, ie_stream, ie_size); + ssidie = (u8 *)cfg80211_find_ie(WLAN_EID_SSID, ie_stream, *ie_size); if (!ssidie) return; if (ssidie[1] != bi->SSID_len) { - WL_ERR(("%s: Wrong SSID len: %d != %d\n", __func__, ssidie[1], bi->SSID_len)); + if (ssidie[1]) { + WL_ERR(("%s: Wrong SSID len: %d != %d\n", __func__, ssidie[1], bi->SSID_len)); + return; + } + memmove(ssidie + bi->SSID_len + 2, ssidie + 2, *ie_size - (ssidie + 2 - ie_stream)); + memcpy(ssidie + 2, bi->SSID, bi->SSID_len); + *ie_size = *ie_size + bi->SSID_len; + ssidie[1] = bi->SSID_len; return; } if (*(ssidie + 2) == '\0') - memcpy((void *)(ssidie + 2), bi->SSID, bi->SSID_len); + memcpy(ssidie + 2, bi->SSID, bi->SSID_len); return; } From 2a8dd77ca0237b5faa6f09cd49cf123e44ebeb24 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 4 Sep 2013 13:39:49 -0700 Subject: [PATCH 592/678] net: wireless: bcmdhd: Clean scan status if request is empty Change-Id: I1c00b5a84846faf316305f57a6a74ded2288a6fd Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 9 ++++++-- drivers/net/wireless/bcmdhd/wl_cfg80211.h | 25 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index d8ad4ef2a2a..3148ef43f27 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -1572,8 +1572,13 @@ __wl_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, WL_DBG(("Enter wiphy (%p)\n", wiphy)); if (wl_get_drv_status_all(wl, SCANNING)) { - WL_ERR(("Scanning already\n")); - return -EAGAIN; + if (wl->scan_request == NULL) { + wl_clr_drv_status_all(wl, SCANNING); + WL_DBG(("<<<<<<<<<<>>>>>>>>>>\n")); + } else { + WL_ERR(("Scanning already\n")); + return -EAGAIN; + } } if (wl_get_drv_status(wl, SCAN_ABORTING, ndev)) { WL_ERR(("Scanning being aborted\n")); diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.h b/drivers/net/wireless/bcmdhd/wl_cfg80211.h index 6d237eee2cc..b53bdd7e048 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.h +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.h @@ -549,6 +549,29 @@ wl_get_status_all(struct wl_priv *wl, s32 status) return cnt? true: false; } + +static inline void +wl_set_status_all(struct wl_priv *wl, s32 status, u32 op) +{ + struct net_info *_net_info, *next; + + list_for_each_entry_safe(_net_info, next, &wl->net_list, list) { + switch (op) { + case 1: + return; /* set all status is not allowed */ + case 2: + clear_bit(status, &_net_info->sme_state); + break; + case 4: + return; /* change all status is not allowed */ + default: + return; /* unknown operation */ + } + } + +} + + static inline void wl_set_status_by_netdev(struct wl_priv *wl, s32 status, struct net_device *ndev, u32 op) @@ -639,6 +662,8 @@ wl_get_profile_by_netdev(struct wl_priv *wl, struct net_device *ndev) (wl_set_status_by_netdev(wl, WL_STATUS_ ## stat, ndev, 1)) #define wl_clr_drv_status(wl, stat, ndev) \ (wl_set_status_by_netdev(wl, WL_STATUS_ ## stat, ndev, 2)) +#define wl_clr_drv_status_all(wl, stat) \ + (wl_set_status_all(wl, WL_STATUS_ ## stat, 2)) #define wl_chg_drv_status(wl, stat, ndev) \ (wl_set_status_by_netdev(wl, WL_STATUS_ ## stat, ndev, 4)) From 3f1af28114b1ca711000e79b942c2af390885575 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 17 May 2013 14:28:21 -0700 Subject: [PATCH 593/678] net: wireless: bcmdhd: Fix sched scan processing Change-Id: I24eebe2723478b902ed4fb2e1f518994f41297f8 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 63 +++++++++++++---------- drivers/net/wireless/bcmdhd/wl_cfg80211.h | 2 +- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 3148ef43f27..9f46c7765b0 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -4543,8 +4543,8 @@ wl_cfg80211_add_set_beacon(struct wiphy *wiphy, struct net_device *dev, } #ifdef WL_SCHED_SCAN -#define PNO_TIME 30 -#define PNO_REPEAT 4 +#define PNO_TIME 30 +#define PNO_REPEAT 4 #define PNO_FREQ_EXPO_MAX 2 int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, @@ -4560,9 +4560,11 @@ int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, int i; int ret = 0; - WL_DBG(("Enter n_match_sets:%d n_ssids:%d \n", + WL_DBG(("Enter \n")); + WL_PNO((">>> SCHED SCAN START\n")); + WL_PNO(("Enter n_match_sets:%d n_ssids:%d \n", request->n_match_sets, request->n_ssids)); - WL_DBG(("ssids:%d pno_time:%d pno_repeat:%d pno_freq:%d \n", + WL_PNO(("ssids:%d pno_time:%d pno_repeat:%d pno_freq:%d \n", request->n_ssids, pno_time, pno_repeat, pno_freq_expo_max)); #if defined(WL_ENABLE_P2P_IF) @@ -4585,7 +4587,7 @@ int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, ssid = &request->match_sets[i].ssid; memcpy(ssids_local[i].SSID, ssid->ssid, ssid->ssid_len); ssids_local[i].SSID_len = ssid->ssid_len; - WL_DBG((">>> PNO filter set for ssid (%s) \n", ssid->ssid)); + WL_PNO((">>> PNO filter set for ssid (%s) \n", ssid->ssid)); ssid_count++; } } @@ -4593,7 +4595,7 @@ int wl_cfg80211_sched_scan_start(struct wiphy *wiphy, if (request->n_ssids > 0) { for (i = 0; i < request->n_ssids; i++) { /* Active scan req for ssids */ - WL_DBG((">>> Active scan req for ssid (%s) \n", request->ssids[i].ssid)); + WL_PNO((">>> Active scan req for ssid (%s) \n", request->ssids[i].ssid)); /* match_set ssids is a supert set of n_ssid list, so we need * not add these set seperately @@ -4626,6 +4628,7 @@ int wl_cfg80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev) struct wl_priv *wl = wiphy_priv(wiphy); WL_DBG(("Enter \n")); + WL_PNO((">>> SCHED SCAN STOP\n")); if (dhd_dev_pno_enable(dev, 0) < 0) WL_ERR(("PNO disable failed")); @@ -4634,6 +4637,7 @@ int wl_cfg80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev) WL_ERR(("PNO reset failed")); if (wl->scan_request && wl->sched_scan_running) { + WL_PNO((">>> Sched scan running. Aborting it..\n")); wl_notify_escan_complete(wl, dev, true, true); } @@ -5626,19 +5630,19 @@ static s32 wl_notify_pfn_status(struct wl_priv *wl, struct net_device *ndev, const wl_event_msg_t *e, void *data) { - WL_ERR((" PNO Event\n")); + WL_ERR((">>> PNO Event\n")); - mutex_lock(&wl->usr_sync); #ifndef WL_SCHED_SCAN + mutex_lock(&wl->usr_sync); /* TODO: Use cfg80211_sched_scan_results(wiphy); */ cfg80211_disconnected(ndev, 0, NULL, 0, GFP_KERNEL); + mutex_unlock(&wl->usr_sync); #else /* If cfg80211 scheduled scan is supported, report the pno results via sched * scan results */ wl_notify_sched_scan_results(wl, ndev, e, data); #endif /* WL_SCHED_SCAN */ - mutex_unlock(&wl->usr_sync); return 0; } #endif /* PNO_SUPPORT */ @@ -5695,14 +5699,15 @@ wl_notify_scan_status(struct wl_priv *wl, struct net_device *ndev, del_timer_sync(&wl->scan_timeout); spin_lock_irqsave(&wl->cfgdrv_lock, flags); if (wl->scan_request) { - WL_DBG(("cfg80211_scan_done\n")); cfg80211_scan_done(wl->scan_request, false); wl->scan_request = NULL; } spin_unlock_irqrestore(&wl->cfgdrv_lock, flags); + WL_DBG(("cfg80211_scan_done\n")); mutex_unlock(&wl->usr_sync); return err; } + static s32 wl_frame_get_mgmt(u16 fc, const struct ether_addr *da, const struct ether_addr *sa, const struct ether_addr *bssid, @@ -5865,7 +5870,7 @@ wl_notify_rx_mgmt_frame(struct wl_priv *wl, struct net_device *ndev, /* If target scan is not reliable, set the below define to "1" to do a * full escan */ -#define FULL_ESCAN_ON_PFN_NET_FOUND 0 +#define FULL_ESCAN_ON_PFN_NET_FOUND 1 static s32 wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, const wl_event_msg_t *e, void *data) @@ -5883,10 +5888,10 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, WL_DBG(("Enter\n")); if (e->event_type == WLC_E_PFN_NET_LOST) { - WL_DBG(("PFN NET LOST event. Do Nothing \n")); + WL_PNO(("PFN NET LOST event. Do Nothing \n")); return 0; } - WL_DBG(("PFN NET FOUND event. count:%d \n", pfn_result->count)); + WL_PNO((">>> PFN NET FOUND event. count:%d \n", pfn_result->count)); if (pfn_result->count > 0) { int i; @@ -5912,7 +5917,7 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, err = -EINVAL; goto out_err; } - WL_DBG(("SSID:%s Channel:%d \n", + WL_PNO((">>> SSID:%s Channel:%d \n", netinfo->pfnsubnet.SSID, netinfo->pfnsubnet.channel)); /* PFN result doesn't have all the info which are required by the supplicant * (For e.g IEs) Do a target Escan so that sched scan results are reported @@ -5945,6 +5950,7 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, } if (wl_get_p2p_status(wl, DISCOVERY_ON)) { + WL_PNO((">>> P2P discovery was ON. Disabling it\n")); err = wl_cfgp2p_discover_enable_search(wl, false); if (unlikely(err)) { wl_clr_drv_status(wl, SCANNING, ndev); @@ -5954,8 +5960,10 @@ wl_notify_sched_scan_results(struct wl_priv *wl, struct net_device *ndev, wl_set_drv_status(wl, SCANNING, ndev); #if FULL_ESCAN_ON_PFN_NET_FOUND + WL_PNO((">>> Doing Full ESCAN on PNO event\n")); err = wl_do_escan(wl, wiphy, ndev, NULL); #else + WL_PNO((">>> Doing targeted ESCAN on PNO event\n")); err = wl_do_escan(wl, wiphy, ndev, &request); #endif if (err) { @@ -6443,9 +6451,9 @@ static s32 wl_notify_escan_complete(struct wl_priv *wl, dev = wl->scan_request->dev; } else { - WL_ERR(("wl->scan_request is NULL may be internal scan." - "doing scan_abort for ndev %p primary %p p2p_net %p", - ndev, wl_to_prmry_ndev(wl), wl->p2p_net)); + WL_DBG(("wl->scan_request is NULL may be internal scan." + "doing scan_abort for ndev %p primary %p", + ndev, wl_to_prmry_ndev(wl))); dev = ndev; } if (fw_abort && !in_atomic()) { @@ -6465,19 +6473,15 @@ static s32 wl_notify_escan_complete(struct wl_priv *wl, if (timer_pending(&wl->scan_timeout)) del_timer_sync(&wl->scan_timeout); spin_lock_irqsave(&wl->cfgdrv_lock, flags); - #ifdef WL_SCHED_SCAN if (wl->sched_scan_req && !wl->scan_request) { - WL_DBG((" REPORTING SCHED SCAN RESULTS \n")); - if (aborted) - cfg80211_sched_scan_stopped(wl->sched_scan_req->wiphy); - else + WL_PNO((">>> REPORTING SCHED SCAN RESULTS \n")); + if (!aborted) cfg80211_sched_scan_results(wl->sched_scan_req->wiphy); wl->sched_scan_running = FALSE; wl->sched_scan_req = NULL; } #endif /* WL_SCHED_SCAN */ - if (likely(wl->scan_request)) { cfg80211_scan_done(wl->scan_request, aborted); wl->scan_request = NULL; @@ -6502,9 +6506,9 @@ static s32 wl_escan_handler(struct wl_priv *wl, wl_escan_result_t *escan_result; wl_bss_info_t *bss = NULL; wl_scan_results_t *list; + wifi_p2p_ie_t * p2p_ie; u32 bi_length; u32 i; - wifi_p2p_ie_t * p2p_ie; u8 *p2p_dev_addr = NULL; WL_DBG((" enter event type : %d, status : %d \n", @@ -6520,15 +6524,18 @@ static s32 wl_escan_handler(struct wl_priv *wl, } if (!ndev || !wl->escan_on || - !wl_get_drv_status(wl, SCANNING, ndev)) { - WL_ERR(("escan is not ready ndev %p wl->escan_on %d drv_status 0x%x\n", - ndev, wl->escan_on, wl_get_drv_status(wl, SCANNING, ndev))); + (!wl_get_drv_status(wl, SCANNING, ndev) && + !wl->sched_scan_running)) { + WL_ERR(("escan is not ready ndev %p wl->escan_on %d" + " drv_status 0x%x e_type %d e_states %d\n", + ndev, wl->escan_on, wl_get_drv_status(wl, SCANNING, ndev), + ntoh32(e->event_type), ntoh32(e->status))); goto exit; } + escan_result = (wl_escan_result_t *)data; if (status == WLC_E_STATUS_PARTIAL) { WL_INFO(("WLC_E_STATUS_PARTIAL \n")); - escan_result = (wl_escan_result_t *) data; if (!escan_result) { WL_ERR(("Invalid escan result (NULL pointer)\n")); goto exit; diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.h b/drivers/net/wireless/bcmdhd/wl_cfg80211.h index b53bdd7e048..aeb63674b83 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.h +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.h @@ -120,7 +120,7 @@ do { \ #else /* !(WL_DBG_LEVEL > 0) */ #define WL_DBG(args) #endif /* (WL_DBG_LEVEL > 0) */ - +#define WL_PNO(args) #define WL_SCAN_RETRY_MAX 3 #define WL_NUM_PMKIDS_MAX MAXPMKID From 053b1b5113b33e429c6be98271a9d77d16ea75d4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 25 Jan 2012 11:38:13 +0100 Subject: [PATCH 594/678] ARM: 7301/1: Rename the T() macro to TUSER() to avoid namespace conflicts This macro is used to generate unprivileged accesses (LDRT/STRT) to user space. Signed-off-by: Catalin Marinas Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 4 +- arch/arm/include/asm/domain.h | 8 ++-- arch/arm/include/asm/futex.h | 8 ++-- arch/arm/include/asm/uaccess.h | 16 +++---- arch/arm/lib/getuser.S | 12 ++--- arch/arm/lib/putuser.S | 28 +++++------ arch/arm/lib/uaccess.S | 82 ++++++++++++++++---------------- 7 files changed, 79 insertions(+), 79 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 7bb8bf972c0..fed335efda2 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -231,7 +231,7 @@ */ #ifdef CONFIG_THUMB2_KERNEL - .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T() + .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=TUSER() 9999: .if \inc == 1 \instr\cond\()b\()\t\().w \reg, [\ptr, #\off] @@ -271,7 +271,7 @@ #else /* !CONFIG_THUMB2_KERNEL */ - .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=T() + .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=TUSER() .rept \rept 9999: .if \inc == 1 diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index af18ceaacf5..b5dc173d336 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -83,9 +83,9 @@ * instructions (inline assembly) */ #ifdef CONFIG_CPU_USE_DOMAINS -#define T(instr) #instr "t" +#define TUSER(instr) #instr "t" #else -#define T(instr) #instr +#define TUSER(instr) #instr #endif #else /* __ASSEMBLY__ */ @@ -95,9 +95,9 @@ * instructions */ #ifdef CONFIG_CPU_USE_DOMAINS -#define T(instr) instr ## t +#define TUSER(instr) instr ## t #else -#define T(instr) instr +#define TUSER(instr) instr #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 253cc86318b..7be54690aee 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -75,9 +75,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1, [%3]\n" \ + "1: " TUSER(ldr) " %1, [%3]\n" \ " " insn "\n" \ - "2: " T(str) " %0, [%3]\n" \ + "2: " TUSER(str) " %0, [%3]\n" \ " mov %0, #0\n" \ __futex_atomic_ex_table("%5") \ : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ @@ -95,10 +95,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: " T(ldr) " %1, [%4]\n" + "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" " it eq @ explicit IT needed for the 2b label\n" - "2: " T(streq) " %3, [%4]\n" + "2: " TUSER(streq) " %3, [%4]\n" __futex_atomic_ex_table("%5") : "+r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index b293616a1a1..2958976d867 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -227,7 +227,7 @@ do { \ #define __get_user_asm_byte(x,addr,err) \ __asm__ __volatile__( \ - "1: " T(ldrb) " %1,[%2],#0\n" \ + "1: " TUSER(ldrb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -263,7 +263,7 @@ do { \ #define __get_user_asm_word(x,addr,err) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1,[%2],#0\n" \ + "1: " TUSER(ldr) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -308,7 +308,7 @@ do { \ #define __put_user_asm_byte(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: " T(strb) " %1,[%2],#0\n" \ + "1: " TUSER(strb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -341,7 +341,7 @@ do { \ #define __put_user_asm_word(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: " T(str) " %1,[%2],#0\n" \ + "1: " TUSER(str) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -366,10 +366,10 @@ do { \ #define __put_user_asm_dword(x,__pu_addr,err) \ __asm__ __volatile__( \ - ARM( "1: " T(str) " " __reg_oper1 ", [%1], #4\n" ) \ - ARM( "2: " T(str) " " __reg_oper0 ", [%1]\n" ) \ - THUMB( "1: " T(str) " " __reg_oper1 ", [%1]\n" ) \ - THUMB( "2: " T(str) " " __reg_oper0 ", [%1, #4]\n" ) \ + ARM( "1: " TUSER(str) " " __reg_oper1 ", [%1], #4\n" ) \ + ARM( "2: " TUSER(str) " " __reg_oper0 ", [%1]\n" ) \ + THUMB( "1: " TUSER(str) " " __reg_oper1 ", [%1]\n" ) \ + THUMB( "2: " TUSER(str) " " __reg_oper0 ", [%1, #4]\n" ) \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 1b049cd7a49..11093a7c3e3 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -31,18 +31,18 @@ #include ENTRY(__get_user_1) -1: T(ldrb) r2, [r0] +1: TUSER(ldrb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_1) ENTRY(__get_user_2) #ifdef CONFIG_THUMB2_KERNEL -2: T(ldrb) r2, [r0] -3: T(ldrb) r3, [r0, #1] +2: TUSER(ldrb) r2, [r0] +3: TUSER(ldrb) r3, [r0, #1] #else -2: T(ldrb) r2, [r0], #1 -3: T(ldrb) r3, [r0] +2: TUSER(ldrb) r2, [r0], #1 +3: TUSER(ldrb) r3, [r0] #endif #ifndef __ARMEB__ orr r2, r2, r3, lsl #8 @@ -54,7 +54,7 @@ ENTRY(__get_user_2) ENDPROC(__get_user_2) ENTRY(__get_user_4) -4: T(ldr) r2, [r0] +4: TUSER(ldr) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_4) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index c023fc11e86..7db25990c58 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -31,7 +31,7 @@ #include ENTRY(__put_user_1) -1: T(strb) r2, [r0] +1: TUSER(strb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_1) @@ -40,19 +40,19 @@ ENTRY(__put_user_2) mov ip, r2, lsr #8 #ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ -2: T(strb) r2, [r0] -3: T(strb) ip, [r0, #1] +2: TUSER(strb) r2, [r0] +3: TUSER(strb) ip, [r0, #1] #else -2: T(strb) ip, [r0] -3: T(strb) r2, [r0, #1] +2: TUSER(strb) ip, [r0] +3: TUSER(strb) r2, [r0, #1] #endif #else /* !CONFIG_THUMB2_KERNEL */ #ifndef __ARMEB__ -2: T(strb) r2, [r0], #1 -3: T(strb) ip, [r0] +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] #else -2: T(strb) ip, [r0], #1 -3: T(strb) r2, [r0] +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] #endif #endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 @@ -60,18 +60,18 @@ ENTRY(__put_user_2) ENDPROC(__put_user_2) ENTRY(__put_user_4) -4: T(str) r2, [r0] +4: TUSER(str) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_4) ENTRY(__put_user_8) #ifdef CONFIG_THUMB2_KERNEL -5: T(str) r2, [r0] -6: T(str) r3, [r0, #4] +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] #else -5: T(str) r2, [r0], #4 -6: T(str) r3, [r0] +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] #endif mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index d0ece2aeb70..5c908b1cb8e 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -32,11 +32,11 @@ rsb ip, ip, #4 cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault sub r2, r2, ip b .Lc2u_dest_aligned @@ -59,7 +59,7 @@ ENTRY(__copy_to_user) addmi ip, r2, #4 bmi .Lc2u_0nowords ldr r3, [r1], #4 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -88,18 +88,18 @@ USER( T(str) r3, [r0], #4) @ May fault stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 ldrne r3, [r1], #4 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_0fupi .Lc2u_0nowords: teq ip, #0 beq .Lc2u_finished .Lc2u_nowords: cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_not_enough: @@ -120,7 +120,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #8 ldr r7, [r1], #4 orr r3, r3, r7, push #24 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -155,18 +155,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #8 ldrne r7, [r1], #4 orrne r3, r3, r7, push #24 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi .Lc2u_1nowords: mov r3, r7, get_byte_1 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_2 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault movgt r3, r7, get_byte_3 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_2fupi: subs r2, r2, #4 @@ -175,7 +175,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #16 ldr r7, [r1], #4 orr r3, r3, r7, push #16 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -210,18 +210,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #16 ldrne r7, [r1], #4 orrne r3, r3, r7, push #16 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi .Lc2u_2nowords: mov r3, r7, get_byte_2 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_3 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_3fupi: subs r2, r2, #4 @@ -230,7 +230,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #24 ldr r7, [r1], #4 orr r3, r3, r7, push #8 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -265,18 +265,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #24 ldrne r7, [r1], #4 orrne r3, r3, r7, push #8 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi .Lc2u_3nowords: mov r3, r7, get_byte_3 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished ENDPROC(__copy_to_user) @@ -295,11 +295,11 @@ ENDPROC(__copy_to_user) .Lcfu_dest_not_aligned: rsb ip, ip, #4 cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 sub r2, r2, ip b .Lcfu_dest_aligned @@ -322,7 +322,7 @@ ENTRY(__copy_from_user) .Lcfu_0fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_0nowords -USER( T(ldr) r3, [r1], #4) +USER( TUSER( ldr) r3, [r1], #4) str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 @@ -351,18 +351,18 @@ USER( T(ldr) r3, [r1], #4) ldmneia r1!, {r3 - r4} @ Shouldnt fault stmneia r0!, {r3 - r4} tst ip, #4 - T(ldrne) r3, [r1], #4 @ Shouldnt fault + TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_0fupi .Lcfu_0nowords: teq ip, #0 beq .Lcfu_finished .Lcfu_nowords: cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -375,7 +375,7 @@ USER( T(ldrgtb) r3, [r1], #1) @ May fault .Lcfu_src_not_aligned: bic r1, r1, #3 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault cmp ip, #2 bgt .Lcfu_3fupi beq .Lcfu_2fupi @@ -383,7 +383,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_1nowords mov r3, r7, pull #8 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -418,7 +418,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #8 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #24 strne r3, [r0], #4 ands ip, ip, #3 @@ -438,7 +438,7 @@ USER( T(ldrne) r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_2nowords mov r3, r7, pull #16 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -474,7 +474,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #16 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #16 strne r3, [r0], #4 ands ip, ip, #3 @@ -486,7 +486,7 @@ USER( T(ldrne) r7, [r1], #4) @ May fault strb r3, [r0], #1 movge r3, r7, get_byte_3 strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #0) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -494,7 +494,7 @@ USER( T(ldrgtb) r3, [r1], #0) @ May fault addmi ip, r2, #4 bmi .Lcfu_3nowords mov r3, r7, pull #24 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -529,7 +529,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #24 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #8 strne r3, [r0], #4 ands ip, ip, #3 @@ -539,9 +539,9 @@ USER( T(ldrne) r7, [r1], #4) @ May fault beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished ENDPROC(__copy_from_user) From e3a3a0e3893eb3017b2378ab0184a348444db2d8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 7 Sep 2012 18:22:28 +0100 Subject: [PATCH 595/678] ARM: 7527/1: uaccess: explicitly check __user pointer when !CPU_USE_DOMAINS The {get,put}_user macros don't perform range checking on the provided __user address when !CPU_HAS_DOMAINS. This patch reworks the out-of-line assembly accessors to check the user address against a specified limit, returning -EFAULT if is is out of range. [will: changed get_user register allocation to match put_user] [rmk: fixed building on older ARM architectures] Reported-by: Catalin Marinas Signed-off-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 8 +++++++ arch/arm/include/asm/uaccess.h | 40 +++++++++++++++++++++----------- arch/arm/lib/getuser.S | 23 +++++++++++------- arch/arm/lib/putuser.S | 6 +++++ 4 files changed, 56 insertions(+), 21 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index fed335efda2..4b0c4eddbcb 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -307,4 +307,12 @@ .size \name , . - \name .endm + .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req +#ifndef CONFIG_CPU_USE_DOMAINS + adds \tmp, \addr, #\size - 1 + sbcccs \tmp, \tmp, \limit + bcs \bad +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 2958976d867..5b1a81ebfe5 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -101,28 +101,39 @@ extern int __get_user_1(void *); extern int __get_user_2(void *); extern int __get_user_4(void *); -#define __get_user_x(__r2,__p,__e,__s,__i...) \ +#define __GUP_CLOBBER_1 "lr", "cc" +#ifdef CONFIG_CPU_USE_DOMAINS +#define __GUP_CLOBBER_2 "ip", "lr", "cc" +#else +#define __GUP_CLOBBER_2 "lr", "cc" +#endif +#define __GUP_CLOBBER_4 "lr", "cc" + +#define __get_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%1", "r2") \ + __asmeq("%3", "r1") \ "bl __get_user_" #__s \ : "=&r" (__e), "=r" (__r2) \ - : "0" (__p) \ - : __i, "cc") + : "0" (__p), "r" (__l) \ + : __GUP_CLOBBER_##__s) #define get_user(x,p) \ ({ \ + unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ register unsigned long __r2 asm("r2"); \ + register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ - __get_user_x(__r2, __p, __e, 1, "lr"); \ - break; \ + __get_user_x(__r2, __p, __e, __l, 1); \ + break; \ case 2: \ - __get_user_x(__r2, __p, __e, 2, "r3", "lr"); \ + __get_user_x(__r2, __p, __e, __l, 2); \ break; \ case 4: \ - __get_user_x(__r2, __p, __e, 4, "lr"); \ + __get_user_x(__r2, __p, __e, __l, 4); \ break; \ default: __e = __get_user_bad(); break; \ } \ @@ -135,31 +146,34 @@ extern int __put_user_2(void *, unsigned int); extern int __put_user_4(void *, unsigned int); extern int __put_user_8(void *, unsigned long long); -#define __put_user_x(__r2,__p,__e,__s) \ +#define __put_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%2", "r2") \ + __asmeq("%3", "r1") \ "bl __put_user_" #__s \ : "=&r" (__e) \ - : "0" (__p), "r" (__r2) \ + : "0" (__p), "r" (__r2), "r" (__l) \ : "ip", "lr", "cc") #define put_user(x,p) \ ({ \ + unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __r2 asm("r2") = (x); \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ + register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ - __put_user_x(__r2, __p, __e, 1); \ + __put_user_x(__r2, __p, __e, __l, 1); \ break; \ case 2: \ - __put_user_x(__r2, __p, __e, 2); \ + __put_user_x(__r2, __p, __e, __l, 2); \ break; \ case 4: \ - __put_user_x(__r2, __p, __e, 4); \ + __put_user_x(__r2, __p, __e, __l, 4); \ break; \ case 8: \ - __put_user_x(__r2, __p, __e, 8); \ + __put_user_x(__r2, __p, __e, __l, 8); \ break; \ default: __e = __put_user_bad(); break; \ } \ diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 11093a7c3e3..9b06bb41fca 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -16,8 +16,9 @@ * __get_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2, r3 contains the zero-extended value + * r2 contains the zero-extended value * lr corrupted * * No other registers must be altered. (see @@ -27,33 +28,39 @@ * Note also that it is intended that __get_user_bad is not global. */ #include +#include #include #include ENTRY(__get_user_1) + check_uaccess r0, 1, r1, r2, __get_user_bad 1: TUSER(ldrb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_1) ENTRY(__get_user_2) -#ifdef CONFIG_THUMB2_KERNEL -2: TUSER(ldrb) r2, [r0] -3: TUSER(ldrb) r3, [r0, #1] + check_uaccess r0, 2, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +2: ldrbt r2, [r0], #1 +3: ldrbt rb, [r0], #0 #else -2: TUSER(ldrb) r2, [r0], #1 -3: TUSER(ldrb) r3, [r0] +rb .req r0 +2: ldrb r2, [r0] +3: ldrb rb, [r0, #1] #endif #ifndef __ARMEB__ - orr r2, r2, r3, lsl #8 + orr r2, r2, rb, lsl #8 #else - orr r2, r3, r2, lsl #8 + orr r2, rb, r2, lsl #8 #endif mov r0, #0 mov pc, lr ENDPROC(__get_user_2) ENTRY(__get_user_4) + check_uaccess r0, 4, r1, r2, __get_user_bad 4: TUSER(ldr) r2, [r0] mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 7db25990c58..3d73dcb959b 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -16,6 +16,7 @@ * __put_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * r2, r3 contains the value * Outputs: r0 is the error code * lr corrupted @@ -27,16 +28,19 @@ * Note also that it is intended that __put_user_bad is not global. */ #include +#include #include #include ENTRY(__put_user_1) + check_uaccess r0, 1, r1, ip, __put_user_bad 1: TUSER(strb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_1) ENTRY(__put_user_2) + check_uaccess r0, 2, r1, ip, __put_user_bad mov ip, r2, lsr #8 #ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ @@ -60,12 +64,14 @@ ENTRY(__put_user_2) ENDPROC(__put_user_2) ENTRY(__put_user_4) + check_uaccess r0, 4, r1, ip, __put_user_bad 4: TUSER(str) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_4) ENTRY(__put_user_8) + check_uaccess r0, 8, r1, ip, __put_user_bad #ifdef CONFIG_THUMB2_KERNEL 5: TUSER(str) r2, [r0] 6: TUSER(str) r3, [r0, #4] From de622ec812165ffc64fd13b5eb444c4818066d94 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 19 Nov 2013 23:11:19 -0500 Subject: [PATCH 596/678] defconfig: a64 for 4.4 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index f9eb6cf11e2..6216f452a8d 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a63" +CONFIG_LOCALVERSION="-MKernel-a64" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y From 79c55b913004c348f0218259e28cf6f29e0ef9d4 Mon Sep 17 00:00:00 2001 From: Francisco Franco Date: Thu, 6 Jun 2013 06:01:42 +0100 Subject: [PATCH 597/678] block: cfq: winning values. Signed-off-by: Francisco Franco --- block/cfq-iosched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 682463dbfc7..0f60ba0ad87 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -21,13 +21,13 @@ */ /* max queue in one round of service */ static const int cfq_quantum = 4; -static const int cfq_fifo_expire[2] = { 42, 11}; +static const int cfq_fifo_expire[2] = { 33, 8}; /* maximum backwards seek, in KiB */ static const int cfq_back_max = 12582912; /* penalty of a backwards seek */ static const int cfq_back_penalty = 1; -static const int cfq_slice_sync = 8; -static int cfq_slice_async = 7; +static const int cfq_slice_sync = 6; +static int cfq_slice_async = 5; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = 0; static int cfq_group_idle = 0; From b5b462bf79531c9964782bfc061fdc44d896e89d Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 15:11:52 -0500 Subject: [PATCH 598/678] block: deadline: rework franco's way is easier for the long run, even if there's no functional difference. --- block/deadline-iosched.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 04440a601ab..e39de38f4b5 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -17,11 +17,12 @@ /* * See Documentation/block/deadline-iosched.txt */ -static const int read_expire = 250; /* max time before a read is submitted. */ -static const int write_expire = 2500; /* ditto for writes, these limits are SOFT! */ -static const int writes_starved = 2; /* max times reads can starve a write */ +static const int read_expire = 25; /* max time before a read is submitted. */ +static const int write_expire = 250; /* ditto for writes, these limits are SOFT! */ +static const int writes_starved = 1; /* max times reads can starve a write */ static const int fifo_batch = 8; /* # of sequential requests treated as one - by the above parameters. For throughput. */ + by the above parameters. For throughput. */ +static const int front_merges = 1; struct deadline_data { /* @@ -352,7 +353,7 @@ static void *deadline_init_queue(struct request_queue *q) dd->fifo_expire[READ] = read_expire; dd->fifo_expire[WRITE] = write_expire; dd->writes_starved = writes_starved; - dd->front_merges = 1; + dd->front_merges = front_merges; dd->fifo_batch = fifo_batch; return dd; } @@ -385,8 +386,8 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \ __data = jiffies_to_msecs(__data); \ return deadline_var_show(__data, (page)); \ } -SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 0); -SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 0); +SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); +SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); @@ -408,8 +409,8 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) *(__PTR) = __data; \ return ret; \ } -STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 0); -STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 0); +STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); @@ -464,3 +465,4 @@ module_exit(deadline_exit); MODULE_AUTHOR("Jens Axboe"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("deadline IO scheduler"); + From 70c6b5d379fd00ff7e72abffc6962b20a9dfa60f Mon Sep 17 00:00:00 2001 From: Boy Petersen Date: Fri, 28 Jun 2013 16:02:22 +0200 Subject: [PATCH 599/678] [PATCH, v9] cgroups: introduce timer slack controller Changelog: v9: - update documentation - minor cleanup v8: - change hiearchy rules - introduce timer_slack.effective_slack_ns - get_task_timer_slack() -> task_get_effective_timer_slack() - task_get_effective_timer_slack() splited in separate patch - implement PR_GET_EFFECTIVE_TIMERSLACK v7: - totally reworked interface and rewritten from scratch (See Documentation/cgroups/timer_slack.txt for more information) v6: - add documentation - use notifier_call_chain() instead of check hook - fix validate_change() - cleanup v5: - -EBUSY on writing to timer_slack.min_slack_ns/max_slack_ns if a child has wider min-max range v4: - hierarchy support - drop dummy_timer_slack_check() - workaround lockdep false (?) positive - allow 0 as timer slack value v3: - rework interface - s/EXPORT_SYMBOL/EXPORT_SYMBOL_GPL/ v2: - fixed with CONFIG_CGROUP_TIMER_SLACK=y v1: - initial revision --- [PATCH, v9 1/3] hrtimer: introduce effective timer slack task_get_effective_timer_slack() returns timer slack value to be used to configure per-task timers. It can be equal or higher than task's timer slack value. For now task_get_effective_timer_slack() returns timer_slack_ns of the task. Timer slack cgroup controller will implement a bit more sophisticated logic. Signed-off-by: Kirill A. Shutemov --- [PATCH, v9 2/3] hrtimer: implement PR_GET_EFFECTIVE_TIMERSLACK PR_GET_EFFECTIVE_TIMERSLACK allows process to know its effective timer slack value. Signed-off-by: Kirill A. Shutemov --- [PATCH, v9 3/3] cgroups: introduce timer slack controller Every task_struct has timer_slack_ns value. This value uses to round up poll() and select() timeout values. This feature can be useful in mobile environment where combined wakeups are desired. Originally, prctl() was the only way to change timer slack value of a process. So you was not able change timer slack value of another process. cgroup subsys "timer_slack" implements timer slack controller. It provides a way to set minimal timer slack value for a group of tasks. If a task belongs to a cgroup with minimal timer slack value higher than task's value, cgroup's value will be applied. Timer slack controller allows to implement setting timer slack value of a process based on a policy. For example, you can create foreground and background cgroups and move tasks between them based on system state. Idea-by: Jacob Pan Signed-off-by: Kirill A. Shutemov --- kernel/cgroup_timer_slack.c: fix compile error kernel/cgroup_timer_slack.c:41: error: implicit declaration of function 'ERR_PTR' Signed-off-by: Boy Petersen Signed-off-by: Francisco Franco Conflicts: include/linux/cgroup_subsys.h --- Documentation/cgroups/timer_slack.txt | 72 +++++++++++++++ fs/select.c | 7 +- include/linux/cgroup_subsys.h | 6 ++ include/linux/prctl.h | 6 ++ include/linux/sched.h | 10 +++ init/Kconfig | 8 ++ kernel/Makefile | 1 + kernel/cgroup_timer_slack.c | 125 ++++++++++++++++++++++++++ kernel/fork.c | 4 + kernel/futex.c | 4 +- kernel/hrtimer.c | 2 +- kernel/sys.c | 3 + 12 files changed, 240 insertions(+), 8 deletions(-) create mode 100644 Documentation/cgroups/timer_slack.txt create mode 100644 kernel/cgroup_timer_slack.c diff --git a/Documentation/cgroups/timer_slack.txt b/Documentation/cgroups/timer_slack.txt new file mode 100644 index 00000000000..4006eabac1c --- /dev/null +++ b/Documentation/cgroups/timer_slack.txt @@ -0,0 +1,72 @@ +Timer Slack Controller +====================== + +Overview +-------- + +Every task_struct has timer_slack_ns value. This value uses to round up +poll() and select() timeout values. This feature can be useful in +mobile environment where combined wakeups are desired. + +Originally, prctl() was the only way to change timer slack value of +a process. So you was not able change timer slack value of another +process. + +cgroup subsys "timer_slack" implements timer slack controller. It +provides a way to set minimal timer slack value for a group of tasks. +If a task belongs to a cgroup with minimal timer slack value higher than +task's value, cgroup's value will be applied. + +Timer slack controller allows to implement setting timer slack value of +a process based on a policy. For example, you can create foreground and +background cgroups and move tasks between them based on system state. + +User interface +-------------- + +To get timer slack controller functionality you need to enable it in +kernel configuration: + +CONFIG_CGROUP_TIMER_SLACK=y + +The controller provides two files: + +# mount -t cgroup -o timer_slack none /sys/fs/cgroup +# ls /sys/fs/cgroup/timer_slack.* +/sys/fs/cgroup/timer_slack.effective_slack_ns +/sys/fs/cgroup/timer_slack.min_slack_ns + +By default timer_slack.min_slack_ns is 0: + +# cat /sys/fs/cgroup/timer_slack.min_slack_ns +0 + +You can set it to some value: + +# echo 50000 > /sys/fs/cgroup/timer_slack.min_slack_ns +# cat /sys/fs/cgroup/timer_slack.min_slack_ns +50000 + +Tasks still can set task's value below 50000 using prctl(), but in this +case cgroup's value will be applied. + +Timer slack controller supports hierarchical groups. + +# mkdir /sys/fs/cgroup/a +# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns +50000 +# echo 70000 > /sys/fs/cgroup/a/timer_slack.min_slack_ns +# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns +70000 + +You can set any value you want, but effective value will the highest value +up by hierarchy. You can see effective timer slack value for the cgroup from +timer_slack.effective_slack_ns file: + +# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns +70000 +# echo 100000 > /sys/fs/cgroup/timer_slack.min_slack_ns +# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns +70000 +# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns +100000 diff --git a/fs/select.c b/fs/select.c index d33418fdc85..049c8ce78ef 100644 --- a/fs/select.c +++ b/fs/select.c @@ -69,7 +69,6 @@ static long __estimate_accuracy(struct timespec *tv) long select_estimate_accuracy(struct timespec *tv) { - unsigned long ret; struct timespec now; /* @@ -81,10 +80,8 @@ long select_estimate_accuracy(struct timespec *tv) ktime_get_ts(&now); now = timespec_sub(*tv, now); - ret = __estimate_accuracy(&now); - if (ret < current->timer_slack_ns) - return current->timer_slack_ns; - return ret; + return min_t(long, __estimate_accuracy(&now), + task_get_effective_timer_slack(current)); } diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index c966638399e..ae2ef66217b 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -70,3 +70,9 @@ SUBSYS(bfqio) #endif /* */ + +#ifdef CONFIG_CGROUP_TIMER_SLACK +SUBSYS(timer_slack) +#endif + +/* */ \ No newline at end of file diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a3baeb2c216..31994587485 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -102,4 +102,10 @@ #define PR_MCE_KILL_GET 34 +/* + * Get effective timerslack value for the process. + * It can be higher than PR_GET_TIMERSLACK. + */ +#define PR_GET_EFFECTIVE_TIMERSLACK 35 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2a5e0244a24..370b48247dc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2741,6 +2741,16 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#ifdef CONFIG_CGROUP_TIMER_SLACK +extern unsigned long task_get_effective_timer_slack(struct task_struct *tsk); +#else +static inline unsigned long task_get_effective_timer_slack( + struct task_struct *tsk) +{ + return tsk->timer_slack_ns; +} +#endif + #endif /* __KERNEL__ */ #endif diff --git a/init/Kconfig b/init/Kconfig index 75611da2802..a49af148c28 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -614,6 +614,14 @@ config CGROUP_FREEZER Provides a way to freeze and unfreeze all tasks in a cgroup. +config CGROUP_TIMER_SLACK + bool "Timer slack cgroup controller" + help + Provides a way to set minimal timer slack value for tasks in + a cgroup. + It's useful in mobile devices where certain background apps + are attached to a cgroup and combined wakeups are desired. + config CGROUP_DEVICE bool "Device controller for cgroups" help diff --git a/kernel/Makefile b/kernel/Makefile index 7595272d146..9e8629bb223 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o +obj-$(CONFIG_CGROUP_TIMER_SLACK) += cgroup_timer_slack.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_USER_NS) += user_namespace.o diff --git a/kernel/cgroup_timer_slack.c b/kernel/cgroup_timer_slack.c new file mode 100644 index 00000000000..808f2500879 --- /dev/null +++ b/kernel/cgroup_timer_slack.c @@ -0,0 +1,125 @@ +/* + * cgroup_timer_slack.c - control group timer slack subsystem + * + * Copyright Nokia Corparation, 2011 + * Author: Kirill A. Shutemov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include + +struct cgroup_subsys timer_slack_subsys; +struct tslack_cgroup { + struct cgroup_subsys_state css; + unsigned long min_slack_ns; +}; + +static struct tslack_cgroup *cgroup_to_tslack(struct cgroup *cgroup) +{ + struct cgroup_subsys_state *css; + + css = cgroup_subsys_state(cgroup, timer_slack_subsys.subsys_id); + return container_of(css, struct tslack_cgroup, css); +} + +static struct cgroup_subsys_state *tslack_create(struct cgroup_subsys *subsys, + struct cgroup *cgroup) +{ + struct tslack_cgroup *tslack_cgroup; + + tslack_cgroup = kmalloc(sizeof(*tslack_cgroup), GFP_KERNEL); + if (!tslack_cgroup) + return ERR_PTR(-ENOMEM); + + if (cgroup->parent) { + struct tslack_cgroup *parent; + + parent = cgroup_to_tslack(cgroup->parent); + tslack_cgroup->min_slack_ns = parent->min_slack_ns; + } else + tslack_cgroup->min_slack_ns = 0UL; + + return &tslack_cgroup->css; +} + +static void tslack_destroy(struct cgroup_subsys *tslack_cgroup, + struct cgroup *cgroup) +{ + kfree(cgroup_to_tslack(cgroup)); +} + +static u64 tslack_read_min(struct cgroup *cgroup, struct cftype *cft) +{ + return cgroup_to_tslack(cgroup)->min_slack_ns; +} + +static int tslack_write_min(struct cgroup *cgroup, struct cftype *cft, u64 val) +{ + if (val > ULONG_MAX) + return -EINVAL; + + cgroup_to_tslack(cgroup)->min_slack_ns = val; + + return 0; +} + +static u64 tslack_read_effective(struct cgroup *cgroup, struct cftype *cft) +{ + unsigned long min; + + min = cgroup_to_tslack(cgroup)->min_slack_ns; + while (cgroup->parent) { + cgroup = cgroup->parent; + min = max(cgroup_to_tslack(cgroup)->min_slack_ns, min); + } + + return min; +} + +static struct cftype files[] = { + { + .name = "min_slack_ns", + .read_u64 = tslack_read_min, + .write_u64 = tslack_write_min, + }, + { + .name = "effective_slack_ns", + .read_u64 = tslack_read_effective, + }, +}; + +static int tslack_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, subsys, files, ARRAY_SIZE(files)); +} + +struct cgroup_subsys timer_slack_subsys = { + .name = "timer_slack", + .subsys_id = timer_slack_subsys_id, + .create = tslack_create, + .destroy = tslack_destroy, + .populate = tslack_populate, +}; + +unsigned long task_get_effective_timer_slack(struct task_struct *tsk) +{ + struct cgroup *cgroup; + unsigned long slack; + + rcu_read_lock(); + cgroup = task_cgroup(tsk, timer_slack_subsys.subsys_id); + slack = tslack_read_effective(cgroup, NULL); + rcu_read_unlock(); + + return max(tsk->timer_slack_ns, slack); +} diff --git a/kernel/fork.c b/kernel/fork.c index f65fa0627c0..50a68c44a60 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1169,6 +1169,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif + /* + * Save current task's (not effective) timer slack value as default + * timer slack value for new task. + */ p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); diff --git a/kernel/futex.c b/kernel/futex.c index e6160fa842e..12082519313 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1887,7 +1887,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, - current->timer_slack_ns); + task_get_effective_timer_slack(current)); } retry: @@ -2281,7 +2281,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, - current->timer_slack_ns); + task_get_effective_timer_slack(current)); } /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2043c08d36c..2bb6b7f4a32 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1564,7 +1564,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, int ret = 0; unsigned long slack; - slack = current->timer_slack_ns; + slack = task_get_effective_timer_slack(current); if (rt_task(current)) slack = 0; diff --git a/kernel/sys.c b/kernel/sys.c index d21bdc4fb26..c97500887c0 100755 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1819,6 +1819,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_TIMERSLACK: error = current->timer_slack_ns; break; + case PR_GET_EFFECTIVE_TIMERSLACK: + error = task_get_effective_timer_slack(current); + break; case PR_SET_TIMERSLACK: if (arg2 <= 0) current->timer_slack_ns = From e55d6fd06f522e462230207634f81bccb4e3a8f8 Mon Sep 17 00:00:00 2001 From: Boy Petersen Date: Mon, 1 Jul 2013 12:15:10 +0200 Subject: [PATCH 600/678] cgroup/timer_slack: fix cgroup permission errors Since the common mounting of cpu and timer_slack into /dev/cpuctl in commit https://github.com/boype/kernel_tuna_jb42/commit/f23ab456002c2d3b416260862dbfdb82b031a450 the Android OS cannot move processes across cgoups anymore: W/SchedPolicy( 1180): add_tid_to_cgroup failed to write '5949' (Permission denied); policy=0 Fix this by implementing an 'allow_attach' handler, according to this commit: https://github.com/boype/kernel_tuna_jb42/commit/bb5b603036b99d80527b222dc98eba4ee8341020 Signed-off-by: Boy Petersen Signed-off-by: Francisco Franco --- kernel/cgroup_timer_slack.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/cgroup_timer_slack.c b/kernel/cgroup_timer_slack.c index 808f2500879..3226deb86c5 100644 --- a/kernel/cgroup_timer_slack.c +++ b/kernel/cgroup_timer_slack.c @@ -58,6 +58,19 @@ static void tslack_destroy(struct cgroup_subsys *tslack_cgroup, kfree(cgroup_to_tslack(cgroup)); } +static int tslack_allow_attach(struct cgroup *cgrp, struct task_struct *tsk) +{ + const struct cred *cred = current_cred(), *tcred; + + tcred = __task_cred(tsk); + + if ((current != tsk) && !capable(CAP_SYS_NICE) && + cred->euid != tcred->uid && cred->euid != tcred->suid) + return -EACCES; + + return 0; +} + static u64 tslack_read_min(struct cgroup *cgroup, struct cftype *cft) { return cgroup_to_tslack(cgroup)->min_slack_ns; @@ -108,6 +121,7 @@ struct cgroup_subsys timer_slack_subsys = { .subsys_id = timer_slack_subsys_id, .create = tslack_create, .destroy = tslack_destroy, + .allow_attach = tslack_allow_attach, .populate = tslack_populate, }; From d9f23713da802982506b2938129c40b67d0ad75d Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Fri, 11 Nov 2011 23:04:09 -0800 Subject: [PATCH 601/678] Revert "block: Switch from BFQ-v5 for 3.1.0 to BFQ-v5r1 for 3.1.0." This reverts commit f298239a4a165775654ba2627f6869bb0c79b8b1. --- block/bfq-cgroup.c | 5 ----- block/bfq-ioc.c | 2 -- block/bfq-iosched.c | 11 +---------- block/bfq-sched.c | 2 -- block/bfq.h | 4 +--- 5 files changed, 2 insertions(+), 22 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 0e575c54f88..74ae73b91e1 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -7,8 +7,6 @@ * Copyright (C) 2008 Fabio Checconi * Paolo Valente * - * Copyright (C) 2010 Paolo Valente - * * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. */ @@ -278,9 +276,6 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (busy && resume) bfq_activate_bfqq(bfqd, bfqq); - - if (bfqd->active_queue == NULL && !bfqd->rq_in_driver) - bfq_schedule_dispatch(bfqd); } /** diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c index c0671a6b650..8f2b6c61d3f 100644 --- a/block/bfq-ioc.c +++ b/block/bfq-ioc.c @@ -6,8 +6,6 @@ * * Copyright (C) 2008 Fabio Checconi * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente */ /** diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f067270eb1f..576cd03a28c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7,8 +7,6 @@ * Copyright (C) 2008 Fabio Checconi * Paolo Valente * - * Copyright (C) 2010 Paolo Valente - * * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. * * BFQ is a proportional share disk scheduling algorithm based on the @@ -144,8 +142,6 @@ static DEFINE_IDA(cic_index_ida); ((struct cfq_io_context *) (rq)->elevator_private[0]) #define RQ_BFQQ(rq) ((rq)->elevator_private[1]) -static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); - #include "bfq-ioc.c" #include "bfq-sched.c" #include "bfq-cgroup.c" @@ -857,7 +853,7 @@ static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) { if (bfqd->budgets_assigned < 194) - return bfq_default_max_budget / 32; + return bfq_default_max_budget; else return bfqd->bfq_max_budget / 32; } @@ -2767,8 +2763,6 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) struct bfq_data *bfqd = e->elevator_data; ssize_t num_char = 0; - spin_lock_irq(bfqd->queue->queue_lock); - num_char += sprintf(page + num_char, "Active:\n"); list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { num_char += sprintf(page + num_char, @@ -2789,9 +2783,6 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) bfqq->last_rais_start_finish), jiffies_to_msecs(bfqq->raising_cur_max_time)); } - - spin_unlock_irq(bfqd->queue->queue_lock); - return num_char; } diff --git a/block/bfq-sched.c b/block/bfq-sched.c index a0051489bef..fd50b7fd130 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c @@ -6,8 +6,6 @@ * * Copyright (C) 2008 Fabio Checconi * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente */ #ifdef CONFIG_CGROUP_BFQIO diff --git a/block/bfq.h b/block/bfq.h index 4f67daae407..be2c572978c 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -1,13 +1,11 @@ /* - * BFQ-v5r1 for 3.1.0: data structures and common functions prototypes. + * BFQ-v5 for 3.1.0: data structures and common functions prototypes. * * Based on ideas and code from CFQ: * Copyright (C) 2003 Jens Axboe * * Copyright (C) 2008 Fabio Checconi * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente */ #ifndef _BFQ_H From fe069ffc6068b7a9dc694908062d39198073ff98 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 15:46:43 -0500 Subject: [PATCH 602/678] Revert "Block: Add the BFQ-v5 I/O scheduler to 3.1" This reverts commit 364dcd13849acaa7f04cca6fbb992c0cd3961388. Conflicts: include/linux/cgroup_subsys.h --- block/Kconfig.iosched | 26 - block/Makefile | 1 - block/bfq-cgroup.c | 831 --------- block/bfq-ioc.c | 380 ----- block/bfq-iosched.c | 3021 --------------------------------- block/bfq-sched.c | 1066 ------------ block/bfq.h | 593 ------- block/blk-ioc.c | 30 +- block/cfq-iosched.c | 10 +- fs/ioprio.c | 9 +- include/linux/cgroup_subsys.h | 6 - include/linux/iocontext.h | 18 +- 12 files changed, 21 insertions(+), 5970 deletions(-) delete mode 100644 block/bfq-cgroup.c delete mode 100644 block/bfq-ioc.c delete mode 100644 block/bfq-iosched.c delete mode 100644 block/bfq-sched.c delete mode 100644 block/bfq.h diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 06ec27e59a0..8201a45cd26 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -71,28 +71,6 @@ config IOSCHED_VR Requests are chosen according to SSTF with a penalty of rev_penalty for switching head direction. -config IOSCHED_BFQ - tristate "BFQ I/O scheduler" - depends on EXPERIMENTAL - default n - ---help--- - The BFQ I/O scheduler tries to distribute bandwidth among - all processes according to their weights. - It aims at distributing the bandwidth as desired, independently of - the disk parameters and with any workload. It also tries to - guarantee low latency to interactive and soft real-time - applications. If compiled built-in (saying Y here), BFQ can - be configured to support hierarchical scheduling. - -config CGROUP_BFQIO - bool "BFQ hierarchical scheduling support" - depends on CGROUPS && IOSCHED_BFQ=y - default n - ---help--- - Enable hierarchical scheduling in BFQ, using the cgroups - filesystem interface. The name of the subsystem will be - bfqio. - choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -116,9 +94,6 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y - config DEFAULT_BFQ - bool "BFQ" if IOSCHED_BFQ=y - config DEFAULT_NOOP bool "No-op" @@ -135,7 +110,6 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ - default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO default "vr" if DEFAULT_VR diff --git a/block/Makefile b/block/Makefile index 760d8f3ff2e..eb332a2d98c 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c deleted file mode 100644 index 74ae73b91e1..00000000000 --- a/block/bfq-cgroup.c +++ /dev/null @@ -1,831 +0,0 @@ -/* - * BFQ: CGROUPS support. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - * - * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. - */ - -#ifdef CONFIG_CGROUP_BFQIO -static struct bfqio_cgroup bfqio_root_cgroup = { - .weight = BFQ_DEFAULT_GRP_WEIGHT, - .ioprio = BFQ_DEFAULT_GRP_IOPRIO, - .ioprio_class = BFQ_DEFAULT_GRP_CLASS, -}; - -static inline void bfq_init_entity(struct bfq_entity *entity, - struct bfq_group *bfqg) -{ - entity->weight = entity->new_weight; - entity->orig_weight = entity->new_weight; - entity->ioprio = entity->new_ioprio; - entity->ioprio_class = entity->new_ioprio_class; - entity->parent = bfqg->my_entity; - entity->sched_data = &bfqg->sched_data; -} - -static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) -{ - return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), - struct bfqio_cgroup, css); -} - -/* - * Search the bfq_group for bfqd into the hash table (by now only a list) - * of bgrp. Must be called under rcu_read_lock(). - */ -static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, - struct bfq_data *bfqd) -{ - struct bfq_group *bfqg; - struct hlist_node *n; - void *key; - - hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) { - key = rcu_dereference(bfqg->bfqd); - if (key == bfqd) - return bfqg; - } - - return NULL; -} - -static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, - struct bfq_group *bfqg) -{ - struct bfq_entity *entity = &bfqg->entity; - - entity->weight = entity->new_weight = bgrp->weight; - entity->orig_weight = entity->new_weight; - entity->ioprio = entity->new_ioprio = bgrp->ioprio; - entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; - entity->ioprio_changed = 1; - entity->my_sched_data = &bfqg->sched_data; -} - -static inline void bfq_group_set_parent(struct bfq_group *bfqg, - struct bfq_group *parent) -{ - struct bfq_entity *entity; - - BUG_ON(parent == NULL); - BUG_ON(bfqg == NULL); - - entity = &bfqg->entity; - entity->parent = parent->my_entity; - entity->sched_data = &parent->sched_data; -} - -/** - * bfq_group_chain_alloc - allocate a chain of groups. - * @bfqd: queue descriptor. - * @cgroup: the leaf cgroup this chain starts from. - * - * Allocate a chain of groups starting from the one belonging to - * @cgroup up to the root cgroup. Stop if a cgroup on the chain - * to the root has already an allocated group on @bfqd. - */ -static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, - struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp; - struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; - - for (; cgroup != NULL; cgroup = cgroup->parent) { - bgrp = cgroup_to_bfqio(cgroup); - - bfqg = bfqio_lookup_group(bgrp, bfqd); - if (bfqg != NULL) { - /* - * All the cgroups in the path from there to the - * root must have a bfq_group for bfqd, so we don't - * need any more allocations. - */ - break; - } - - bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); - if (bfqg == NULL) - goto cleanup; - - bfq_group_init_entity(bgrp, bfqg); - bfqg->my_entity = &bfqg->entity; - - if (leaf == NULL) { - leaf = bfqg; - prev = leaf; - } else { - bfq_group_set_parent(prev, bfqg); - /* - * Build a list of allocated nodes using the bfqd - * filed, that is still unused and will be initialized - * only after the node will be connected. - */ - prev->bfqd = bfqg; - prev = bfqg; - } - } - - return leaf; - -cleanup: - while (leaf != NULL) { - prev = leaf; - leaf = leaf->bfqd; - kfree(prev); - } - - return NULL; -} - -/** - * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy. - * @bfqd: the queue descriptor. - * @cgroup: the leaf cgroup to start from. - * @leaf: the leaf group (to be associated to @cgroup). - * - * Try to link a chain of groups to a cgroup hierarchy, connecting the - * nodes bottom-up, so we can be sure that when we find a cgroup in the - * hierarchy that already as a group associated to @bfqd all the nodes - * in the path to the root cgroup have one too. - * - * On locking: the queue lock protects the hierarchy (there is a hierarchy - * per device) while the bfqio_cgroup lock protects the list of groups - * belonging to the same cgroup. - */ -static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, - struct bfq_group *leaf) -{ - struct bfqio_cgroup *bgrp; - struct bfq_group *bfqg, *next, *prev = NULL; - unsigned long flags; - - assert_spin_locked(bfqd->queue->queue_lock); - - for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { - bgrp = cgroup_to_bfqio(cgroup); - next = leaf->bfqd; - - bfqg = bfqio_lookup_group(bgrp, bfqd); - BUG_ON(bfqg != NULL); - - spin_lock_irqsave(&bgrp->lock, flags); - - rcu_assign_pointer(leaf->bfqd, bfqd); - hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); - hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); - - spin_unlock_irqrestore(&bgrp->lock, flags); - - prev = leaf; - leaf = next; - } - - BUG_ON(cgroup == NULL && leaf != NULL); - if (cgroup != NULL && prev != NULL) { - bgrp = cgroup_to_bfqio(cgroup); - bfqg = bfqio_lookup_group(bgrp, bfqd); - bfq_group_set_parent(prev, bfqg); - } -} - -/** - * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. - * @bfqd: queue descriptor. - * @cgroup: cgroup being searched for. - * - * Return a group associated to @bfqd in @cgroup, allocating one if - * necessary. When a group is returned all the cgroups in the path - * to the root have a group associated to @bfqd. - * - * If the allocation fails, return the root group: this breaks guarantees - * but is a safe fallbak. If this loss becames a problem it can be - * mitigated using the equivalent weight (given by the product of the - * weights of the groups in the path from @group to the root) in the - * root scheduler. - * - * We allocate all the missing nodes in the path from the leaf cgroup - * to the root and we connect the nodes only after all the allocations - * have been successful. - */ -static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, - struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); - struct bfq_group *bfqg; - - bfqg = bfqio_lookup_group(bgrp, bfqd); - if (bfqg != NULL) - return bfqg; - - bfqg = bfq_group_chain_alloc(bfqd, cgroup); - if (bfqg != NULL) - bfq_group_chain_link(bfqd, cgroup, bfqg); - else - bfqg = bfqd->root_group; - - return bfqg; -} - -/** - * bfq_bfqq_move - migrate @bfqq to @bfqg. - * @bfqd: queue descriptor. - * @bfqq: the queue to move. - * @entity: @bfqq's entity. - * @bfqg: the group to move to. - * - * Move @bfqq to @bfqg, deactivating it from its old group and reactivating - * it on the new one. Avoid putting the entity on the old group idle tree. - * - * Must be called under the queue lock; the cgroup owning @bfqg must - * not disappear (by now this just means that we are called under - * rcu_read_lock()). - */ -static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct bfq_entity *entity, struct bfq_group *bfqg) -{ - int busy, resume; - - busy = bfq_bfqq_busy(bfqq); - resume = !RB_EMPTY_ROOT(&bfqq->sort_list); - - BUG_ON(resume && !entity->on_st); - BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue); - - if (busy) { - BUG_ON(atomic_read(&bfqq->ref) < 2); - - if (!resume) - bfq_del_bfqq_busy(bfqd, bfqq, 0); - else - bfq_deactivate_bfqq(bfqd, bfqq, 0); - } else if (entity->on_st) - bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); - - /* - * Here we use a reference to bfqg. We don't need a refcounter - * as the cgroup reference will not be dropped, so that its - * destroy() callback will not be invoked. - */ - entity->parent = bfqg->my_entity; - entity->sched_data = &bfqg->sched_data; - - if (busy && resume) - bfq_activate_bfqq(bfqd, bfqq); -} - -/** - * __bfq_cic_change_cgroup - move @cic to @cgroup. - * @bfqd: the queue descriptor. - * @cic: the cic to move. - * @cgroup: the cgroup to move to. - * - * Move cic to cgroup, assuming that bfqd->queue is locked; the caller - * has to make sure that the reference to cgroup is valid across the call. - * - * NOTE: an alternative approach might have been to store the current - * cgroup in bfqq and getting a reference to it, reducing the lookup - * time here, at the price of slightly more complex code. - */ -static struct bfq_group *__bfq_cic_change_cgroup(struct bfq_data *bfqd, - struct cfq_io_context *cic, - struct cgroup *cgroup) -{ - struct bfq_queue *async_bfqq = cic_to_bfqq(cic, 0); - struct bfq_queue *sync_bfqq = cic_to_bfqq(cic, 1); - struct bfq_entity *entity; - struct bfq_group *bfqg; - - bfqg = bfq_find_alloc_group(bfqd, cgroup); - if (async_bfqq != NULL) { - entity = &async_bfqq->entity; - - if (entity->sched_data != &bfqg->sched_data) { - cic_set_bfqq(cic, NULL, 0); - bfq_log_bfqq(bfqd, async_bfqq, - "cic_change_group: %p %d", - async_bfqq, atomic_read(&async_bfqq->ref)); - bfq_put_queue(async_bfqq); - } - } - - if (sync_bfqq != NULL) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); - } - - return bfqg; -} - -/** - * bfq_cic_change_cgroup - move @cic to @cgroup. - * @cic: the cic being migrated. - * @cgroup: the destination cgroup. - * - * When the task owning @cic is moved to @cgroup, @cic is immediately - * moved into its new parent group. - */ -static void bfq_cic_change_cgroup(struct cfq_io_context *cic, - struct cgroup *cgroup) -{ - struct bfq_data *bfqd; - unsigned long uninitialized_var(flags); - - bfqd = bfq_get_bfqd_locked(&cic->key, &flags); - if (bfqd != NULL && - !strncmp(bfqd->queue->elevator->elevator_type->elevator_name, - "bfq", ELV_NAME_MAX)) { - __bfq_cic_change_cgroup(bfqd, cic, cgroup); - bfq_put_bfqd_unlock(bfqd, &flags); - } -} - -/** - * bfq_cic_update_cgroup - update the cgroup of @cic. - * @cic: the @cic to update. - * - * Make sure that @cic is enqueued in the cgroup of the current task. - * We need this in addition to moving cics during the cgroup attach - * phase because the task owning @cic could be at its first disk - * access or we may end up in the root cgroup as the result of a - * memory allocation failure and here we try to move to the right - * group. - * - * Must be called under the queue lock. It is safe to use the returned - * value even after the rcu_read_unlock() as the migration/destruction - * paths act under the queue lock too. IOW it is impossible to race with - * group migration/destruction and end up with an invalid group as: - * a) here cgroup has not yet been destroyed, nor its destroy callback - * has started execution, as current holds a reference to it, - * b) if it is destroyed after rcu_read_unlock() [after current is - * migrated to a different cgroup] its attach() callback will have - * taken care of remove all the references to the old cgroup data. - */ -static struct bfq_group *bfq_cic_update_cgroup(struct cfq_io_context *cic) -{ - struct bfq_data *bfqd = cic->key; - struct bfq_group *bfqg; - struct cgroup *cgroup; - - BUG_ON(bfqd == NULL); - - rcu_read_lock(); - cgroup = task_cgroup(current, bfqio_subsys_id); - bfqg = __bfq_cic_change_cgroup(bfqd, cic, cgroup); - rcu_read_unlock(); - - return bfqg; -} - -/** - * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. - * @st: the service tree being flushed. - */ -static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) -{ - struct bfq_entity *entity = st->first_idle; - - for (; entity != NULL; entity = st->first_idle) - __bfq_deactivate_entity(entity, 0); -} - -/** - * bfq_reparent_leaf_entity - move leaf entity to the root_group. - * @bfqd: the device data structure with the root group. - * @entity: the entity to move. - */ -static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(bfqq == NULL); - bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); - return; -} - -/** - * bfq_reparent_active_entities - move to the root group all active entities. - * @bfqd: the device data structure with the root group. - * @bfqg: the group to move from. - * @st: the service tree with the entities. - * - * Needs queue_lock to be taken and reference to be valid over the call. - */ -static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, - struct bfq_group *bfqg, - struct bfq_service_tree *st) -{ - struct rb_root *active = &st->active; - struct bfq_entity *entity = NULL; - - if (!RB_EMPTY_ROOT(&st->active)) - entity = bfq_entity_of(rb_first(active)); - - for (; entity != NULL ; entity = bfq_entity_of(rb_first(active))) - bfq_reparent_leaf_entity(bfqd, entity); - - if (bfqg->sched_data.active_entity != NULL) - bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity); - - return; -} - -/** - * bfq_destroy_group - destroy @bfqg. - * @bgrp: the bfqio_cgroup containing @bfqg. - * @bfqg: the group being destroyed. - * - * Destroy @bfqg, making sure that it is not referenced from its parent. - */ -static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) -{ - struct bfq_data *bfqd; - struct bfq_service_tree *st; - struct bfq_entity *entity = bfqg->my_entity; - unsigned long uninitialized_var(flags); - int i; - - hlist_del(&bfqg->group_node); - - /* - * Empty all service_trees belonging to this group before deactivating - * the group itself. - */ - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { - st = bfqg->sched_data.service_tree + i; - - /* - * The idle tree may still contain bfq_queues belonging - * to exited task because they never migrated to a different - * cgroup from the one being destroyed now. Noone else - * can access them so it's safe to act without any lock. - */ - bfq_flush_idle_tree(st); - - /* - * It may happen that some queues are still active - * (busy) upon group destruction (if the corresponding - * processes have been forced to terminate). We move - * all the leaf entities corresponding to these queues - * to the root_group. - * Also, it may happen that the group has an entity - * under service, which is disconnected from the active - * tree: it must be moved, too. - * There is no need to put the sync queues, as the - * scheduler has taken no reference. - */ - bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); - if (bfqd != NULL) { - bfq_reparent_active_entities(bfqd, bfqg, st); - bfq_put_bfqd_unlock(bfqd, &flags); - } - BUG_ON(!RB_EMPTY_ROOT(&st->active)); - BUG_ON(!RB_EMPTY_ROOT(&st->idle)); - } - BUG_ON(bfqg->sched_data.next_active != NULL); - BUG_ON(bfqg->sched_data.active_entity != NULL); - - /* - * We may race with device destruction, take extra care when - * dereferencing bfqg->bfqd. - */ - bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); - if (bfqd != NULL) { - hlist_del(&bfqg->bfqd_node); - __bfq_deactivate_entity(entity, 0); - bfq_put_async_queues(bfqd, bfqg); - bfq_put_bfqd_unlock(bfqd, &flags); - } - BUG_ON(entity->tree != NULL); - - /* - * No need to defer the kfree() to the end of the RCU grace - * period: we are called from the destroy() callback of our - * cgroup, so we can be sure that noone is a) still using - * this cgroup or b) doing lookups in it. - */ - kfree(bfqg); -} - -/** - * bfq_disconnect_groups - diconnect @bfqd from all its groups. - * @bfqd: the device descriptor being exited. - * - * When the device exits we just make sure that no lookup can return - * the now unused group structures. They will be deallocated on cgroup - * destruction. - */ -static void bfq_disconnect_groups(struct bfq_data *bfqd) -{ - struct hlist_node *pos, *n; - struct bfq_group *bfqg; - - bfq_log(bfqd, "disconnect_groups beginning") ; - hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) { - hlist_del(&bfqg->bfqd_node); - - __bfq_deactivate_entity(bfqg->my_entity, 0); - - /* - * Don't remove from the group hash, just set an - * invalid key. No lookups can race with the - * assignment as bfqd is being destroyed; this - * implies also that new elements cannot be added - * to the list. - */ - rcu_assign_pointer(bfqg->bfqd, NULL); - - bfq_log(bfqd, "disconnect_groups: put async for group %p", - bfqg) ; - bfq_put_async_queues(bfqd, bfqg); - } -} - -static inline void bfq_free_root_group(struct bfq_data *bfqd) -{ - struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; - struct bfq_group *bfqg = bfqd->root_group; - - bfq_put_async_queues(bfqd, bfqg); - - spin_lock_irq(&bgrp->lock); - hlist_del_rcu(&bfqg->group_node); - spin_unlock_irq(&bgrp->lock); - - /* - * No need to synchronize_rcu() here: since the device is gone - * there cannot be any read-side access to its root_group. - */ - kfree(bfqg); -} - -static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -{ - struct bfq_group *bfqg; - struct bfqio_cgroup *bgrp; - int i; - - bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); - if (bfqg == NULL) - return NULL; - - bfqg->entity.parent = NULL; - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) - bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; - - bgrp = &bfqio_root_cgroup; - spin_lock_irq(&bgrp->lock); - rcu_assign_pointer(bfqg->bfqd, bfqd); - hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); - spin_unlock_irq(&bgrp->lock); - - return bfqg; -} - -#define SHOW_FUNCTION(__VAR) \ -static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ - struct cftype *cftype) \ -{ \ - struct bfqio_cgroup *bgrp; \ - u64 ret; \ - \ - if (!cgroup_lock_live_group(cgroup)) \ - return -ENODEV; \ - \ - bgrp = cgroup_to_bfqio(cgroup); \ - spin_lock_irq(&bgrp->lock); \ - ret = bgrp->__VAR; \ - spin_unlock_irq(&bgrp->lock); \ - \ - cgroup_unlock(); \ - \ - return ret; \ -} - -SHOW_FUNCTION(weight); -SHOW_FUNCTION(ioprio); -SHOW_FUNCTION(ioprio_class); -#undef SHOW_FUNCTION - -#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ -static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ - struct cftype *cftype, \ - u64 val) \ -{ \ - struct bfqio_cgroup *bgrp; \ - struct bfq_group *bfqg; \ - struct hlist_node *n; \ - \ - if (val < (__MIN) || val > (__MAX)) \ - return -EINVAL; \ - \ - if (!cgroup_lock_live_group(cgroup)) \ - return -ENODEV; \ - \ - bgrp = cgroup_to_bfqio(cgroup); \ - \ - spin_lock_irq(&bgrp->lock); \ - bgrp->__VAR = (unsigned short)val; \ - hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \ - bfqg->entity.new_##__VAR = (unsigned short)val; \ - smp_wmb(); \ - bfqg->entity.ioprio_changed = 1; \ - } \ - spin_unlock_irq(&bgrp->lock); \ - \ - cgroup_unlock(); \ - \ - return 0; \ -} - -STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); -STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); -STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); -#undef STORE_FUNCTION - -static struct cftype bfqio_files[] = { - { - .name = "weight", - .read_u64 = bfqio_cgroup_weight_read, - .write_u64 = bfqio_cgroup_weight_write, - }, - { - .name = "ioprio", - .read_u64 = bfqio_cgroup_ioprio_read, - .write_u64 = bfqio_cgroup_ioprio_write, - }, - { - .name = "ioprio_class", - .read_u64 = bfqio_cgroup_ioprio_class_read, - .write_u64 = bfqio_cgroup_ioprio_class_write, - }, -}; - -static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) -{ - return cgroup_add_files(cgroup, subsys, bfqio_files, - ARRAY_SIZE(bfqio_files)); -} - -static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys, - struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp; - - if (cgroup->parent != NULL) { - bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); - if (bgrp == NULL) - return ERR_PTR(-ENOMEM); - } else - bgrp = &bfqio_root_cgroup; - - spin_lock_init(&bgrp->lock); - INIT_HLIST_HEAD(&bgrp->group_data); - bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; - bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; - - return &bgrp->css; -} - -/* - * We cannot support shared io contexts, as we have no mean to support - * two tasks with the same ioc in two different groups without major rework - * of the main cic/bfqq data structures. By now we allow a task to change - * its cgroup only if it's the only owner of its ioc; the drawback of this - * behavior is that a group containing a task that forked using CLONE_IO - * will not be destroyed until the tasks sharing the ioc die. - */ -static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, - struct task_struct *tsk) -{ - struct io_context *ioc; - int ret = 0; - - /* task_lock() is needed to avoid races with exit_io_context() */ - task_lock(tsk); - ioc = tsk->io_context; - if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) - /* - * ioc == NULL means that the task is either too young or - * exiting: if it has still no ioc the ioc can't be shared, - * if the task is exiting the attach will fail anyway, no - * matter what we return here. - */ - ret = -EINVAL; - task_unlock(tsk); - - return ret; -} - -static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, - struct cgroup *prev, struct task_struct *tsk) -{ - struct io_context *ioc; - struct cfq_io_context *cic; - struct hlist_node *n; - - task_lock(tsk); - ioc = tsk->io_context; - if (ioc != NULL) { - BUG_ON(atomic_long_read(&ioc->refcount) == 0); - atomic_long_inc(&ioc->refcount); - } - task_unlock(tsk); - - if (ioc == NULL) - return; - - rcu_read_lock(); - hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) - bfq_cic_change_cgroup(cic, cgroup); - rcu_read_unlock(); - - put_io_context(ioc); -} - -static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); - struct hlist_node *n, *tmp; - struct bfq_group *bfqg; - - /* - * Since we are destroying the cgroup, there are no more tasks - * referencing it, and all the RCU grace periods that may have - * referenced it are ended (as the destruction of the parent - * cgroup is RCU-safe); bgrp->group_data will not be accessed by - * anything else and we don't need any synchronization. - */ - hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node) - bfq_destroy_group(bgrp, bfqg); - - BUG_ON(!hlist_empty(&bgrp->group_data)); - - kfree(bgrp); -} - -struct cgroup_subsys bfqio_subsys = { - .name = "bfqio", - .create = bfqio_create, - .can_attach = bfqio_can_attach, - .attach = bfqio_attach, - .destroy = bfqio_destroy, - .populate = bfqio_populate, - .subsys_id = bfqio_subsys_id, -}; -#else -static inline void bfq_init_entity(struct bfq_entity *entity, - struct bfq_group *bfqg) -{ - entity->weight = entity->new_weight; - entity->orig_weight = entity->new_weight; - entity->ioprio = entity->new_ioprio; - entity->ioprio_class = entity->new_ioprio_class; - entity->sched_data = &bfqg->sched_data; -} - -static inline struct bfq_group * -bfq_cic_update_cgroup(struct cfq_io_context *cic) -{ - struct bfq_data *bfqd = cic->key; - return bfqd->root_group; -} - -static inline void bfq_bfqq_move(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct bfq_entity *entity, - struct bfq_group *bfqg) -{ -} - -static inline void bfq_disconnect_groups(struct bfq_data *bfqd) -{ - bfq_put_async_queues(bfqd, bfqd->root_group); -} - -static inline void bfq_free_root_group(struct bfq_data *bfqd) -{ - kfree(bfqd->root_group); -} - -static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -{ - struct bfq_group *bfqg; - int i; - - bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); - if (bfqg == NULL) - return NULL; - - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) - bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; - - return bfqg; -} -#endif diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c deleted file mode 100644 index 8f2b6c61d3f..00000000000 --- a/block/bfq-ioc.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * BFQ: I/O context handling. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - */ - -/** - * bfq_cic_free_rcu - deferred cic freeing. - * @head: RCU head of the cic to free. - * - * Free the cic containing @head and, if it was the last one and - * the module is exiting wake up anyone waiting for its deallocation - * (see bfq_exit()). - */ -static void bfq_cic_free_rcu(struct rcu_head *head) -{ - struct cfq_io_context *cic; - - cic = container_of(head, struct cfq_io_context, rcu_head); - - kmem_cache_free(bfq_ioc_pool, cic); - elv_ioc_count_dec(bfq_ioc_count); - - if (bfq_ioc_gone != NULL) { - spin_lock(&bfq_ioc_gone_lock); - if (bfq_ioc_gone != NULL && - !elv_ioc_count_read(bfq_ioc_count)) { - complete(bfq_ioc_gone); - bfq_ioc_gone = NULL; - } - spin_unlock(&bfq_ioc_gone_lock); - } -} - -static void bfq_cic_free(struct cfq_io_context *cic) -{ - call_rcu(&cic->rcu_head, bfq_cic_free_rcu); -} - -/** - * cic_free_func - disconnect a cic ready to be freed. - * @ioc: the io_context @cic belongs to. - * @cic: the cic to be freed. - * - * Remove @cic from the @ioc radix tree hash and from its cic list, - * deferring the deallocation of @cic to the end of the current RCU - * grace period. This assumes that __bfq_exit_single_io_context() - * has already been called for @cic. - */ -static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) -{ - unsigned long flags; - unsigned long dead_key = (unsigned long) cic->key; - - BUG_ON(!(dead_key & CIC_DEAD_KEY)); - - spin_lock_irqsave(&ioc->lock, flags); - radix_tree_delete(&ioc->bfq_radix_root, - dead_key >> CIC_DEAD_INDEX_SHIFT); - hlist_del_init_rcu(&cic->cic_list); - spin_unlock_irqrestore(&ioc->lock, flags); - - bfq_cic_free(cic); -} - -static void bfq_free_io_context(struct io_context *ioc) -{ - /* - * ioc->refcount is zero here, or we are called from elv_unregister(), - * so no more cic's are allowed to be linked into this ioc. So it - * should be ok to iterate over the known list, we will see all cic's - * since no new ones are added. - */ - call_for_each_cic(ioc, cic_free_func); -} - -/** - * __bfq_exit_single_io_context - deassociate @cic from any running task. - * @bfqd: bfq_data on which @cic is valid. - * @cic: the cic being exited. - * - * Whenever no more tasks are using @cic or @bfqd is deallocated we - * need to invalidate its entry in the radix tree hash table and to - * release the queues it refers to. - * - * Called under the queue lock. - */ -static void __bfq_exit_single_io_context(struct bfq_data *bfqd, - struct cfq_io_context *cic) -{ - struct io_context *ioc = cic->ioc; - - list_del_init(&cic->queue_list); - - /* - * Make sure dead mark is seen for dead queues - */ - smp_wmb(); - rcu_assign_pointer(cic->key, bfqd_dead_key(bfqd)); - - /* - * No write-side locking as no task is using @ioc (they're exited - * or bfqd is being deallocated. - */ - rcu_read_lock(); - if (rcu_dereference(ioc->ioc_data) == cic) { - rcu_read_unlock(); - spin_lock(&ioc->lock); - rcu_assign_pointer(ioc->ioc_data, NULL); - spin_unlock(&ioc->lock); - } else - rcu_read_unlock(); - - if (cic->cfqq[BLK_RW_ASYNC] != NULL) { - bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_ASYNC]); - cic->cfqq[BLK_RW_ASYNC] = NULL; - } - - if (cic->cfqq[BLK_RW_SYNC] != NULL) { - bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_SYNC]); - cic->cfqq[BLK_RW_SYNC] = NULL; - } -} - -/** - * bfq_exit_single_io_context - deassociate @cic from @ioc (unlocked version). - * @ioc: the io_context @cic belongs to. - * @cic: the cic being exited. - * - * Take the queue lock and call __bfq_exit_single_io_context() to do the - * rest of the work. We take care of possible races with bfq_exit_queue() - * using bfq_get_bfqd_locked() (and abusing a little bit the RCU mechanism). - */ -static void bfq_exit_single_io_context(struct io_context *ioc, - struct cfq_io_context *cic) -{ - struct bfq_data *bfqd; - unsigned long uninitialized_var(flags); - - bfqd = bfq_get_bfqd_locked(&cic->key, &flags); - if (bfqd != NULL) { - __bfq_exit_single_io_context(bfqd, cic); - bfq_put_bfqd_unlock(bfqd, &flags); - } -} - -/** - * bfq_exit_io_context - deassociate @ioc from all cics it owns. - * @ioc: the @ioc being exited. - * - * No more processes are using @ioc we need to clean up and put the - * internal structures we have that belongs to that process. Loop - * through all its cics, locking their queues and exiting them. - */ -static void bfq_exit_io_context(struct io_context *ioc) -{ - call_for_each_cic(ioc, bfq_exit_single_io_context); -} - -static struct cfq_io_context *bfq_alloc_io_context(struct bfq_data *bfqd, - gfp_t gfp_mask) -{ - struct cfq_io_context *cic; - - cic = kmem_cache_alloc_node(bfq_ioc_pool, gfp_mask | __GFP_ZERO, - bfqd->queue->node); - if (cic != NULL) { - cic->ttime.last_end_request = jiffies; - INIT_LIST_HEAD(&cic->queue_list); - INIT_HLIST_NODE(&cic->cic_list); - cic->dtor = bfq_free_io_context; - cic->exit = bfq_exit_io_context; - elv_ioc_count_inc(bfq_ioc_count); - } - - return cic; -} - -/** - * bfq_drop_dead_cic - free an exited cic. - * @bfqd: bfq data for the device in use. - * @ioc: io_context owning @cic. - * @cic: the @cic to free. - * - * We drop cfq io contexts lazily, so we may find a dead one. - */ -static void bfq_drop_dead_cic(struct bfq_data *bfqd, struct io_context *ioc, - struct cfq_io_context *cic) -{ - unsigned long flags; - - WARN_ON(!list_empty(&cic->queue_list)); - BUG_ON(cic->key != bfqd_dead_key(bfqd)); - - spin_lock_irqsave(&ioc->lock, flags); - - BUG_ON(ioc->ioc_data == cic); - - /* - * With shared I/O contexts two lookups may race and drop the - * same cic more than one time: RCU guarantees that the storage - * will not be freed too early, here we make sure that we do - * not try to remove the cic from the hashing structures multiple - * times. - */ - if (!hlist_unhashed(&cic->cic_list)) { - radix_tree_delete(&ioc->bfq_radix_root, bfqd->cic_index); - hlist_del_init_rcu(&cic->cic_list); - bfq_cic_free(cic); - } - - spin_unlock_irqrestore(&ioc->lock, flags); -} - -/** - * bfq_cic_lookup - search into @ioc a cic associated to @bfqd. - * @bfqd: the lookup key. - * @ioc: the io_context of the process doing I/O. - * - * If @ioc already has a cic associated to @bfqd return it, return %NULL - * otherwise. - */ -static struct cfq_io_context *bfq_cic_lookup(struct bfq_data *bfqd, - struct io_context *ioc) -{ - struct cfq_io_context *cic; - unsigned long flags; - void *k; - - if (unlikely(ioc == NULL)) - return NULL; - - rcu_read_lock(); - - /* We maintain a last-hit cache, to avoid browsing over the tree. */ - cic = rcu_dereference(ioc->ioc_data); - if (cic != NULL) { - k = rcu_dereference(cic->key); - if (k == bfqd) - goto out; - } - - do { - cic = radix_tree_lookup(&ioc->bfq_radix_root, - bfqd->cic_index); - if (cic == NULL) - goto out; - - k = rcu_dereference(cic->key); - if (unlikely(k != bfqd)) { - rcu_read_unlock(); - bfq_drop_dead_cic(bfqd, ioc, cic); - rcu_read_lock(); - continue; - } - - spin_lock_irqsave(&ioc->lock, flags); - rcu_assign_pointer(ioc->ioc_data, cic); - spin_unlock_irqrestore(&ioc->lock, flags); - break; - } while (1); - -out: - rcu_read_unlock(); - - return cic; -} - -/** - * bfq_cic_link - add @cic to @ioc. - * @bfqd: bfq_data @cic refers to. - * @ioc: io_context @cic belongs to. - * @cic: the cic to link. - * @gfp_mask: the mask to use for radix tree preallocations. - * - * Add @cic to @ioc, using @bfqd as the search key. This enables us to - * lookup the process specific cfq io context when entered from the block - * layer. Also adds @cic to a per-bfqd list, used when this queue is - * removed. - */ -static int bfq_cic_link(struct bfq_data *bfqd, struct io_context *ioc, - struct cfq_io_context *cic, gfp_t gfp_mask) -{ - unsigned long flags; - int ret; - - ret = radix_tree_preload(gfp_mask); - if (ret == 0) { - cic->ioc = ioc; - - /* No write-side locking, cic is not published yet. */ - rcu_assign_pointer(cic->key, bfqd); - - spin_lock_irqsave(&ioc->lock, flags); - ret = radix_tree_insert(&ioc->bfq_radix_root, - bfqd->cic_index, cic); - if (ret == 0) - hlist_add_head_rcu(&cic->cic_list, &ioc->bfq_cic_list); - spin_unlock_irqrestore(&ioc->lock, flags); - - radix_tree_preload_end(); - - if (ret == 0) { - spin_lock_irqsave(bfqd->queue->queue_lock, flags); - list_add(&cic->queue_list, &bfqd->cic_list); - spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); - } - } - - if (ret != 0) - printk(KERN_ERR "bfq: cic link failed!\n"); - - return ret; -} - -/** - * bfq_ioc_set_ioprio - signal a priority change to the cics belonging to @ioc. - * @ioc: the io_context changing its priority. - */ -static inline void bfq_ioc_set_ioprio(struct io_context *ioc) -{ - call_for_each_cic(ioc, bfq_changed_ioprio); -} - -/** - * bfq_get_io_context - return the @cic associated to @bfqd in @ioc. - * @bfqd: the search key. - * @gfp_mask: the mask to use for cic allocation. - * - * Setup general io context and cfq io context. There can be several cfq - * io contexts per general io context, if this process is doing io to more - * than one device managed by cfq. - */ -static struct cfq_io_context *bfq_get_io_context(struct bfq_data *bfqd, - gfp_t gfp_mask) -{ - struct io_context *ioc = NULL; - struct cfq_io_context *cic; - - might_sleep_if(gfp_mask & __GFP_WAIT); - - ioc = get_io_context(gfp_mask, bfqd->queue->node); - if (ioc == NULL) - return NULL; - - /* Lookup for an existing cic. */ - cic = bfq_cic_lookup(bfqd, ioc); - if (cic != NULL) - goto out; - - /* Alloc one if needed. */ - cic = bfq_alloc_io_context(bfqd, gfp_mask); - if (cic == NULL) - goto err; - - /* Link it into the ioc's radix tree and cic list. */ - if (bfq_cic_link(bfqd, ioc, cic, gfp_mask) != 0) - goto err_free; - -out: - /* - * test_and_clear_bit() implies a memory barrier, paired with - * the wmb() in fs/ioprio.c, so the value seen for ioprio is the - * new one. - */ - if (unlikely(test_and_clear_bit(IOC_BFQ_IOPRIO_CHANGED, - ioc->ioprio_changed))) - bfq_ioc_set_ioprio(ioc); - - return cic; -err_free: - bfq_cic_free(cic); -err: - put_io_context(ioc); - return NULL; -} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c deleted file mode 100644 index 576cd03a28c..00000000000 --- a/block/bfq-iosched.c +++ /dev/null @@ -1,3021 +0,0 @@ -/* - * BFQ, or Budget Fair Queueing, disk scheduler. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - * - * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. - * - * BFQ is a proportional share disk scheduling algorithm based on the - * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, - * measured in number of sectors, to tasks instead of time slices. - * The disk is not granted to the active task for a given time slice, - * but until it has exahusted its assigned budget. This change from - * the time to the service domain allows BFQ to distribute the disk - * bandwidth among tasks as desired, without any distortion due to - * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc - * internal scheduler, called B-WF2Q+, to schedule tasks according to - * their budgets. Thanks to this accurate scheduler, BFQ can afford - * to assign high budgets to disk-bound non-seeky tasks (to boost the - * throughput), and yet guarantee low latencies to interactive and - * soft real-time applications. - * - * BFQ has been introduced in [1], where the interested reader can - * find an accurate description of the algorithm, the bandwidth - * distribution and latency guarantees it provides, plus formal proofs - * of all the properties. With respect to the algorithm presented in - * the paper, this implementation adds several little heuristics, and - * a hierarchical extension, based on H-WF2Q+. - * - * B-WF2Q+ is based on WF2Q+, that is described in [2], together with - * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) - * complexity derives from the one introduced with EEVDF in [3]. - * - * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling - * with Deterministic Guarantees on Bandwidth Distribution,'', - * IEEE Transactions on Computer, May 2010. - * - * http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf - * - * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing - * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, - * Oct 1997. - * - * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz - * - * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline - * First: A Flexible and Accurate Mechanism for Proportional Share - * Resource Allocation,'' technical report. - * - * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "bfq.h" - -/* Max number of dispatches in one round of service. */ -static const int bfq_quantum = 4; - -/* Expiration time of sync (0) and async (1) requests, in jiffies. */ -static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; - -/* Maximum backwards seek, in KiB. */ -static const int bfq_back_max = 16 * 1024; - -/* Penalty of a backwards seek, in number of sectors. */ -static const int bfq_back_penalty = 2; - -/* Idling period duration, in jiffies. */ -static int bfq_slice_idle = HZ / 125; - -/* Default maximum budget values, in sectors and number of requests. */ -static const int bfq_default_max_budget = 16 * 1024; -static const int bfq_max_budget_async_rq = 4; - -/* - * Async to sync throughput distribution is controlled as follows: - * when an async request is served, the entity is charged the number - * of sectors of the request, multipled by the factor below - */ -static const int bfq_async_charge_factor = 10; - -/* Default timeout values, in jiffies, approximating CFQ defaults. */ -static const int bfq_timeout_sync = HZ / 8; -static int bfq_timeout_async = HZ / 25; - -struct kmem_cache *bfq_pool; -struct kmem_cache *bfq_ioc_pool; - -static DEFINE_PER_CPU(unsigned long, bfq_ioc_count); -static struct completion *bfq_ioc_gone; -static DEFINE_SPINLOCK(bfq_ioc_gone_lock); - -static DEFINE_SPINLOCK(cic_index_lock); -static DEFINE_IDA(cic_index_ida); - -/* Below this threshold (in ms), we consider thinktime immediate. */ -#define BFQ_MIN_TT 2 - -/* hw_tag detection: parallel requests threshold and min samples needed. */ -#define BFQ_HW_QUEUE_THRESHOLD 4 -#define BFQ_HW_QUEUE_SAMPLES 32 - -#define BFQQ_SEEK_THR (sector_t)(8 * 1024) -#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) - -/* Min samples used for peak rate estimation (for autotuning). */ -#define BFQ_PEAK_RATE_SAMPLES 32 - -/* Shift used for peak rate fixed precision calculations. */ -#define BFQ_RATE_SHIFT 16 - -/* - * The duration of the weight raising for interactive applications is - * computed automatically (as default behaviour), using the following - * formula: duration = (R / r) * T, where r is the peak rate of the - * disk, and R and T are two reference parameters. In particular, R is - * the peak rate of a reference disk, and T is about the maximum time - * for starting popular large applications on that disk, under BFQ and - * while reading two files in parallel. Finally, BFQ uses two - * different pairs (R, T) depending on whether the disk is rotational - * or non-rotational. - */ -#define T_rot (msecs_to_jiffies(5500)) -#define T_nonrot (msecs_to_jiffies(2000)) -/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */ -#define R_rot 17415 -#define R_nonrot 34791 - -#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ - { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) - -#define RQ_CIC(rq) \ - ((struct cfq_io_context *) (rq)->elevator_private[0]) -#define RQ_BFQQ(rq) ((rq)->elevator_private[1]) - -#include "bfq-ioc.c" -#include "bfq-sched.c" -#include "bfq-cgroup.c" - -#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ - IOPRIO_CLASS_IDLE) -#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ - IOPRIO_CLASS_RT) - -#define bfq_sample_valid(samples) ((samples) > 80) - -/* - * We regard a request as SYNC, if either it's a read or has the SYNC bit - * set (in which case it could also be a direct WRITE). - */ -static inline int bfq_bio_sync(struct bio *bio) -{ - if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) - return 1; - - return 0; -} - -/* - * Scheduler run of queue, if there are requests pending and no one in the - * driver that will restart queueing. - */ -static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) -{ - if (bfqd->queued != 0) { - bfq_log(bfqd, "schedule dispatch"); - kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); - } -} - -/* - * Lifted from AS - choose which of rq1 and rq2 that is best served now. - * We choose the request that is closesr to the head right now. Distance - * behind the head is penalized and only allowed to a certain extent. - */ -static struct request *bfq_choose_req(struct bfq_data *bfqd, - struct request *rq1, - struct request *rq2, - sector_t last) -{ - sector_t s1, s2, d1 = 0, d2 = 0; - unsigned long back_max; -#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ -#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ - unsigned wrap = 0; /* bit mask: requests behind the disk head? */ - - if (rq1 == NULL || rq1 == rq2) - return rq2; - if (rq2 == NULL) - return rq1; - - if (rq_is_sync(rq1) && !rq_is_sync(rq2)) - return rq1; - else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) - return rq2; - if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) - return rq1; - else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) - return rq2; - - s1 = blk_rq_pos(rq1); - s2 = blk_rq_pos(rq2); - - /* - * By definition, 1KiB is 2 sectors. - */ - back_max = bfqd->bfq_back_max * 2; - - /* - * Strict one way elevator _except_ in the case where we allow - * short backward seeks which are biased as twice the cost of a - * similar forward seek. - */ - if (s1 >= last) - d1 = s1 - last; - else if (s1 + back_max >= last) - d1 = (last - s1) * bfqd->bfq_back_penalty; - else - wrap |= BFQ_RQ1_WRAP; - - if (s2 >= last) - d2 = s2 - last; - else if (s2 + back_max >= last) - d2 = (last - s2) * bfqd->bfq_back_penalty; - else - wrap |= BFQ_RQ2_WRAP; - - /* Found required data */ - - /* - * By doing switch() on the bit mask "wrap" we avoid having to - * check two variables for all permutations: --> faster! - */ - switch (wrap) { - case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ - if (d1 < d2) - return rq1; - else if (d2 < d1) - return rq2; - else { - if (s1 >= s2) - return rq1; - else - return rq2; - } - - case BFQ_RQ2_WRAP: - return rq1; - case BFQ_RQ1_WRAP: - return rq2; - case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ - default: - /* - * Since both rqs are wrapped, - * start with the one that's further behind head - * (--> only *one* back seek required), - * since back seek takes more time than forward. - */ - if (s1 <= s2) - return rq1; - else - return rq2; - } -} - -static struct bfq_queue * -bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, - sector_t sector, struct rb_node **ret_parent, - struct rb_node ***rb_link) -{ - struct rb_node **p, *parent; - struct bfq_queue *bfqq = NULL; - - parent = NULL; - p = &root->rb_node; - while (*p) { - struct rb_node **n; - - parent = *p; - bfqq = rb_entry(parent, struct bfq_queue, pos_node); - - /* - * Sort strictly based on sector. Smallest to the left, - * largest to the right. - */ - if (sector > blk_rq_pos(bfqq->next_rq)) - n = &(*p)->rb_right; - else if (sector < blk_rq_pos(bfqq->next_rq)) - n = &(*p)->rb_left; - else - break; - p = n; - bfqq = NULL; - } - - *ret_parent = parent; - if (rb_link) - *rb_link = p; - - bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", - (long long unsigned)sector, - bfqq != NULL ? bfqq->pid : 0); - - return bfqq; -} - -static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - struct rb_node **p, *parent; - struct bfq_queue *__bfqq; - - if (bfqq->pos_root != NULL) { - rb_erase(&bfqq->pos_node, bfqq->pos_root); - bfqq->pos_root = NULL; - } - - if (bfq_class_idle(bfqq)) - return; - if (!bfqq->next_rq) - return; - - bfqq->pos_root = &bfqd->rq_pos_tree; - __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, - blk_rq_pos(bfqq->next_rq), &parent, &p); - if (__bfqq == NULL) { - rb_link_node(&bfqq->pos_node, parent, p); - rb_insert_color(&bfqq->pos_node, bfqq->pos_root); - } else - bfqq->pos_root = NULL; -} - -static struct request *bfq_find_next_rq(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct request *last) -{ - struct rb_node *rbnext = rb_next(&last->rb_node); - struct rb_node *rbprev = rb_prev(&last->rb_node); - struct request *next = NULL, *prev = NULL; - - BUG_ON(RB_EMPTY_NODE(&last->rb_node)); - - if (rbprev != NULL) - prev = rb_entry_rq(rbprev); - - if (rbnext != NULL) - next = rb_entry_rq(rbnext); - else { - rbnext = rb_first(&bfqq->sort_list); - if (rbnext && rbnext != &last->rb_node) - next = rb_entry_rq(rbnext); - } - - return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); -} - -static void bfq_del_rq_rb(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_data *bfqd = bfqq->bfqd; - const int sync = rq_is_sync(rq); - - BUG_ON(bfqq->queued[sync] == 0); - bfqq->queued[sync]--; - bfqd->queued--; - - elv_rb_del(&bfqq->sort_list, rq); - - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { - if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue) - bfq_del_bfqq_busy(bfqd, bfqq, 1); - /* - * Remove queue from request-position tree as it is empty. - */ - if (bfqq->pos_root != NULL) { - rb_erase(&bfqq->pos_node, bfqq->pos_root); - bfqq->pos_root = NULL; - } - } -} - -/* see the definition of bfq_async_charge_factor for details */ -static inline unsigned long bfq_serv_to_charge(struct request *rq, - struct bfq_queue *bfqq) -{ - return blk_rq_sectors(rq) * - (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) * - bfq_async_charge_factor)); -} - -/** - * bfq_updated_next_req - update the queue after a new next_rq selection. - * @bfqd: the device data the queue belongs to. - * @bfqq: the queue to update. - * - * If the first request of a queue changes we make sure that the queue - * has enough budget to serve at least its first request (if the - * request has grown). We do this because if the queue has not enough - * budget for its first request, it has to go through two dispatch - * rounds to actually get it dispatched. - */ -static void bfq_updated_next_req(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - struct request *next_rq = bfqq->next_rq; - unsigned long new_budget; - - if (next_rq == NULL) - return; - - if (bfqq == bfqd->active_queue) - /* - * In order not to break guarantees, budgets cannot be - * changed after an entity has been selected. - */ - return; - - BUG_ON(entity->tree != &st->active); - BUG_ON(entity == entity->sched_data->active_entity); - - new_budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); - entity->budget = new_budget; - bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget); - bfq_activate_bfqq(bfqd, bfqq); -} - -static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd) -{ - u64 dur; - - if (bfqd->bfq_raising_max_time > 0) - return bfqd->bfq_raising_max_time; - - dur = bfqd->RT_prod; - do_div(dur, bfqd->peak_rate); - - return dur; -} - -static void bfq_add_rq_rb(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_entity *entity = &bfqq->entity; - struct bfq_data *bfqd = bfqq->bfqd; - struct request *next_rq, *prev; - unsigned long old_raising_coeff = bfqq->raising_coeff; - int idle_for_long_time = bfqq->budget_timeout + - bfqd->bfq_raising_min_idle_time < jiffies; - - bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq)); - bfqq->queued[rq_is_sync(rq)]++; - bfqd->queued++; - - elv_rb_add(&bfqq->sort_list, rq); - - /* - * Check if this request is a better next-serve candidate. - */ - prev = bfqq->next_rq; - next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); - BUG_ON(next_rq == NULL); - bfqq->next_rq = next_rq; - - /* - * Adjust priority tree position, if next_rq changes. - */ - if (prev != bfqq->next_rq) - bfq_rq_pos_tree_add(bfqd, bfqq); - - if (!bfq_bfqq_busy(bfqq)) { - int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && - bfqq->soft_rt_next_start < jiffies; - entity->budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); - - if (! bfqd->low_latency) - goto add_bfqq_busy; - - /* - * If the queue is not being boosted and has been idle - * for enough time, start a weight-raising period - */ - if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) { - bfqq->raising_coeff = bfqd->bfq_raising_coeff; - if (idle_for_long_time) - bfqq->raising_cur_max_time = - bfq_wrais_duration(bfqd); - else - bfqq->raising_cur_max_time = - bfqd->bfq_raising_rt_max_time; - bfq_log_bfqq(bfqd, bfqq, - "wrais starting at %llu msec," - "rais_max_time %u", - bfqq->last_rais_start_finish, - jiffies_to_msecs(bfqq-> - raising_cur_max_time)); - } else if (old_raising_coeff > 1) { - if (idle_for_long_time) - bfqq->raising_cur_max_time = - bfq_wrais_duration(bfqd); - else if (bfqq->raising_cur_max_time == - bfqd->bfq_raising_rt_max_time && - !soft_rt) { - bfqq->raising_coeff = 1; - bfq_log_bfqq(bfqd, bfqq, - "wrais ending at %llu msec," - "rais_max_time %u", - bfqq->last_rais_start_finish, - jiffies_to_msecs(bfqq-> - raising_cur_max_time)); - } - } - if (old_raising_coeff != bfqq->raising_coeff) - entity->ioprio_changed = 1; -add_bfqq_busy: - bfq_add_bfqq_busy(bfqd, bfqq); - } else { - if(bfqd->low_latency && old_raising_coeff == 1 && - !rq_is_sync(rq) && - bfqq->last_rais_start_finish + - bfqd->bfq_raising_min_inter_arr_async < jiffies) { - bfqq->raising_coeff = bfqd->bfq_raising_coeff; - bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd); - - entity->ioprio_changed = 1; - bfq_log_bfqq(bfqd, bfqq, - "non-idle wrais starting at %llu msec," - "rais_max_time %u", - bfqq->last_rais_start_finish, - jiffies_to_msecs(bfqq-> - raising_cur_max_time)); - } - bfq_updated_next_req(bfqd, bfqq); - } - - if(bfqd->low_latency && - (old_raising_coeff == 1 || bfqq->raising_coeff == 1 || - idle_for_long_time)) - bfqq->last_rais_start_finish = jiffies; -} - -static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq) -{ - elv_rb_del(&bfqq->sort_list, rq); - bfqq->queued[rq_is_sync(rq)]--; - bfqq->bfqd->queued--; - bfq_add_rq_rb(rq); -} - -static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, - struct bio *bio) -{ - struct task_struct *tsk = current; - struct cfq_io_context *cic; - struct bfq_queue *bfqq; - - cic = bfq_cic_lookup(bfqd, tsk->io_context); - if (cic == NULL) - return NULL; - - bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); - if (bfqq != NULL) { - sector_t sector = bio->bi_sector + bio_sectors(bio); - - return elv_rb_find(&bfqq->sort_list, sector); - } - - return NULL; -} - -static void bfq_activate_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - - bfqd->rq_in_driver++; - bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); - bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", - (long long unsigned)bfqd->last_position); -} - -static void bfq_deactivate_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - - WARN_ON(bfqd->rq_in_driver == 0); - bfqd->rq_in_driver--; -} - -static void bfq_remove_request(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_data *bfqd = bfqq->bfqd; - - if (bfqq->next_rq == rq) { - bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); - bfq_updated_next_req(bfqd, bfqq); - } - - list_del_init(&rq->queuelist); - bfq_del_rq_rb(rq); - - if (rq->cmd_flags & REQ_META) { - WARN_ON(bfqq->meta_pending == 0); - bfqq->meta_pending--; - } -} - -static int bfq_merge(struct request_queue *q, struct request **req, - struct bio *bio) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct request *__rq; - - __rq = bfq_find_rq_fmerge(bfqd, bio); - if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { - *req = __rq; - return ELEVATOR_FRONT_MERGE; - } - - return ELEVATOR_NO_MERGE; -} - -static void bfq_merged_request(struct request_queue *q, struct request *req, - int type) -{ - if (type == ELEVATOR_FRONT_MERGE) { - struct bfq_queue *bfqq = RQ_BFQQ(req); - - bfq_reposition_rq_rb(bfqq, req); - } -} - -static void bfq_merged_requests(struct request_queue *q, struct request *rq, - struct request *next) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - - /* - * Reposition in fifo if next is older than rq. - */ - if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && - time_before(rq_fifo_time(next), rq_fifo_time(rq))) { - list_move(&rq->queuelist, &next->queuelist); - rq_set_fifo_time(rq, rq_fifo_time(next)); - } - - if (bfqq->next_rq == next) - bfqq->next_rq = rq; - - bfq_remove_request(next); -} - -static int bfq_allow_merge(struct request_queue *q, struct request *rq, - struct bio *bio) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct cfq_io_context *cic; - struct bfq_queue *bfqq; - - /* Disallow merge of a sync bio into an async request. */ - if (bfq_bio_sync(bio) && !rq_is_sync(rq)) - return 0; - - /* - * Lookup the bfqq that this bio will be queued with. Allow - * merge only if rq is queued there. - */ - cic = bfq_cic_lookup(bfqd, current->io_context); - if (cic == NULL) - return 0; - - bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); - return bfqq == RQ_BFQQ(rq); -} - -static void __bfq_set_active_queue(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - if (bfqq != NULL) { - bfq_mark_bfqq_must_alloc(bfqq); - bfq_mark_bfqq_budget_new(bfqq); - bfq_clear_bfqq_fifo_expire(bfqq); - - bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; - - bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu", - bfqq->entity.budget); - } - - bfqd->active_queue = bfqq; -} - -/* - * Get and set a new active queue for service. - */ -static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - if (!bfqq) - bfqq = bfq_get_next_queue(bfqd); - else - bfq_get_next_queue_forced(bfqd, bfqq); - - __bfq_set_active_queue(bfqd, bfqq); - return bfqq; -} - -static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, - struct request *rq) -{ - if (blk_rq_pos(rq) >= bfqd->last_position) - return blk_rq_pos(rq) - bfqd->last_position; - else - return bfqd->last_position - blk_rq_pos(rq); -} - -/* - * Return true if bfqq has no request pending and rq is close enough to - * bfqd->last_position, or if rq is closer to bfqd->last_position than - * bfqq->next_rq - */ -static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) -{ - return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; -} - -static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) -{ - struct rb_root *root = &bfqd->rq_pos_tree; - struct rb_node *parent, *node; - struct bfq_queue *__bfqq; - sector_t sector = bfqd->last_position; - - if (RB_EMPTY_ROOT(root)) - return NULL; - - /* - * First, if we find a request starting at the end of the last - * request, choose it. - */ - __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); - if (__bfqq != NULL) - return __bfqq; - - /* - * If the exact sector wasn't found, the parent of the NULL leaf - * will contain the closest sector (rq_pos_tree sorted by next_request - * position). - */ - __bfqq = rb_entry(parent, struct bfq_queue, pos_node); - if (bfq_rq_close(bfqd, __bfqq->next_rq)) - return __bfqq; - - if (blk_rq_pos(__bfqq->next_rq) < sector) - node = rb_next(&__bfqq->pos_node); - else - node = rb_prev(&__bfqq->pos_node); - if (node == NULL) - return NULL; - - __bfqq = rb_entry(node, struct bfq_queue, pos_node); - if (bfq_rq_close(bfqd, __bfqq->next_rq)) - return __bfqq; - - return NULL; -} - -/* - * bfqd - obvious - * cur_bfqq - passed in so that we don't decide that the current queue - * is closely cooperating with itself. - * - * We are assuming that cur_bfqq has dispatched at least one request, - * and that bfqd->last_position reflects a position on the disk associated - * with the I/O issued by cur_bfqq. - */ -static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, - struct bfq_queue *cur_bfqq) -{ - struct bfq_queue *bfqq; - - if (bfq_class_idle(cur_bfqq)) - return NULL; - if (!bfq_bfqq_sync(cur_bfqq)) - return NULL; - if (BFQQ_SEEKY(cur_bfqq)) - return NULL; - - /* If device has only one backlogged bfq_queue, don't search. */ - if (bfqd->busy_queues == 1) - return NULL; - - /* - * We should notice if some of the queues are cooperating, e.g. - * working closely on the same area of the disk. In that case, - * we can group them together and don't waste time idling. - */ - bfqq = bfqq_close(bfqd); - if (bfqq == NULL || bfqq == cur_bfqq) - return NULL; - - /* - * Do not merge queues from different bfq_groups. - */ - if (bfqq->entity.parent != cur_bfqq->entity.parent) - return NULL; - - /* - * It only makes sense to merge sync queues. - */ - if (!bfq_bfqq_sync(bfqq)) - return NULL; - if (BFQQ_SEEKY(bfqq)) - return NULL; - - /* - * Do not merge queues of different priority classes. - */ - if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) - return NULL; - - return bfqq; -} - -/* - * If enough samples have been computed, return the current max budget - * stored in bfqd, which is dynamically updated according to the - * estimated disk peak rate; otherwise return the default max budget - */ -static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) -{ - if (bfqd->budgets_assigned < 194) - return bfq_default_max_budget; - else - return bfqd->bfq_max_budget; -} - -/* - * Return min budget, which is a fraction of the current or default - * max budget (trying with 1/32) - */ -static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) -{ - if (bfqd->budgets_assigned < 194) - return bfq_default_max_budget; - else - return bfqd->bfq_max_budget / 32; -} - -/* - * Decides whether idling should be done for given device and - * given active queue. - */ -static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd, - struct bfq_queue *active_bfqq) -{ - if (active_bfqq == NULL) - return false; - /* - * If device is SSD it has no seek penalty, disable idling; but - * do so only if: - * - device does not support queuing, otherwise we still have - * a problem with sync vs async workloads; - * - the queue is not weight-raised, to preserve guarantees. - */ - return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag && - active_bfqq->raising_coeff == 1); -} - -static void bfq_arm_slice_timer(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq = bfqd->active_queue; - struct cfq_io_context *cic; - unsigned long sl; - - WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); - - if (bfq_queue_nonrot_noidle(bfqd, bfqq)) - return; - - /* Idling is disabled, either manually or by past process history. */ - if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_idle_window(bfqq)) - return; - - /* Tasks have exited, don't wait. */ - cic = bfqd->active_cic; - if (cic == NULL || atomic_read(&cic->ioc->nr_tasks) == 0) - return; - - bfq_mark_bfqq_wait_request(bfqq); - - /* - * We don't want to idle for seeks, but we do want to allow - * fair distribution of slice time for a process doing back-to-back - * seeks. So allow a little bit of time for him to submit a new rq. - * - * To prevent processes with (partly) seeky workloads from - * being too ill-treated, grant them a small fraction of the - * assigned budget before reducing the waiting time to - * BFQ_MIN_TT. This happened to help reduce latency. - */ - sl = bfqd->bfq_slice_idle; - if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) && - bfqq->entity.service > bfq_max_budget(bfqd) / 8 && - bfqq->raising_coeff == 1) - sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); - else if (bfqq->raising_coeff > 1) - sl = sl * 3; - bfqd->last_idling_start = ktime_get(); - mod_timer(&bfqd->idle_slice_timer, jiffies + sl); - bfq_log(bfqd, "arm idle: %u/%u ms", - jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); -} - -/* - * Set the maximum time for the active queue to consume its - * budget. This prevents seeky processes from lowering the disk - * throughput (always guaranteed with a time slice scheme as in CFQ). - */ -static void bfq_set_budget_timeout(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq = bfqd->active_queue; - unsigned int timeout_coeff; - if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time) - timeout_coeff = 1; - else - timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; - - bfqd->last_budget_start = ktime_get(); - - bfq_clear_bfqq_budget_new(bfqq); - bfqq->budget_timeout = jiffies + - bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; - - bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", - jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * - timeout_coeff)); -} - -/* - * Move request from internal lists to the request queue dispatch list. - */ -static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq = RQ_BFQQ(rq); - - bfq_remove_request(rq); - bfqq->dispatched++; - elv_dispatch_sort(q, rq); - - if (bfq_bfqq_sync(bfqq)) - bfqd->sync_flight++; -} - -/* - * Return expired entry, or NULL to just start from scratch in rbtree. - */ -static struct request *bfq_check_fifo(struct bfq_queue *bfqq) -{ - struct request *rq = NULL; - - if (bfq_bfqq_fifo_expire(bfqq)) - return NULL; - - bfq_mark_bfqq_fifo_expire(bfqq); - - if (list_empty(&bfqq->fifo)) - return NULL; - - rq = rq_entry_fifo(bfqq->fifo.next); - - if (time_before(jiffies, rq_fifo_time(rq))) - return NULL; - - return rq; -} - -/* - * Must be called with the queue_lock held. - */ -static int bfqq_process_refs(struct bfq_queue *bfqq) -{ - int process_refs, io_refs; - - io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; - process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; - BUG_ON(process_refs < 0); - return process_refs; -} - -static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -{ - int process_refs, new_process_refs; - struct bfq_queue *__bfqq; - - /* - * If there are no process references on the new_bfqq, then it is - * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain - * may have dropped their last reference (not just their last process - * reference). - */ - if (!bfqq_process_refs(new_bfqq)) - return; - - /* Avoid a circular list and skip interim queue merges. */ - while ((__bfqq = new_bfqq->new_bfqq)) { - if (__bfqq == bfqq) - return; - new_bfqq = __bfqq; - } - - process_refs = bfqq_process_refs(bfqq); - new_process_refs = bfqq_process_refs(new_bfqq); - /* - * If the process for the bfqq has gone away, there is no - * sense in merging the queues. - */ - if (process_refs == 0 || new_process_refs == 0) - return; - - /* - * Merge in the direction of the lesser amount of work. - */ - if (new_process_refs >= process_refs) { - bfqq->new_bfqq = new_bfqq; - atomic_add(process_refs, &new_bfqq->ref); - } else { - new_bfqq->new_bfqq = bfqq; - atomic_add(new_process_refs, &bfqq->ref); - } - bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", - new_bfqq->pid); -} - -static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - return entity->budget - entity->service; -} - -static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - BUG_ON(bfqq != bfqd->active_queue); - - __bfq_bfqd_reset_active(bfqd); - - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { - bfq_del_bfqq_busy(bfqd, bfqq, 1); - /* - * overloading budget_timeout field to store when - * the queue remains with no backlog, used by - * the weight-raising mechanism - */ - bfqq->budget_timeout = jiffies ; - } - else { - bfq_activate_bfqq(bfqd, bfqq); - /* - * Resort priority tree of potential close cooperators. - */ - bfq_rq_pos_tree_add(bfqd, bfqq); - } - - /* - * If this bfqq is shared between multiple processes, check - * to make sure that those processes are still issuing I/Os - * within the mean seek distance. If not, it may be time to - * break the queues apart again. - */ - if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) - bfq_mark_bfqq_split_coop(bfqq); -} - -/** - * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. - * @bfqd: device data. - * @bfqq: queue to update. - * @reason: reason for expiration. - * - * Handle the feedback on @bfqq budget. See the body for detailed - * comments. - */ -static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - enum bfqq_expiration reason) -{ - struct request *next_rq; - unsigned long budget, min_budget; - - budget = bfqq->max_budget; - min_budget = bfq_min_budget(bfqd); - - BUG_ON(bfqq != bfqd->active_queue); - - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", - bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", - budget, bfq_min_budget(bfqd)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", - bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue)); - - if (bfq_bfqq_sync(bfqq)) { - switch (reason) { - /* - * Caveat: in all the following cases we trade latency - * for throughput. - */ - case BFQ_BFQQ_TOO_IDLE: - /* - * This is the only case where we may reduce - * the budget: if there is no requets of the - * process still waiting for completion, then - * we assume (tentatively) that the timer has - * expired because the batch of requests of - * the process could have been served with a - * smaller budget. Hence, betting that - * process will behave in the same way when it - * becomes backlogged again, we reduce its - * next budget. As long as we guess right, - * this budget cut reduces the latency - * experienced by the process. - * - * However, if there are still outstanding - * requests, then the process may have not yet - * issued its next request just because it is - * still waiting for the completion of some of - * the still oustanding ones. So in this - * subcase we do not reduce its budget, on the - * contrary we increase it to possibly boost - * the throughput, as discussed in the - * comments to the BUDGET_TIMEOUT case. - */ - if (bfqq->dispatched > 0) /* still oustanding reqs */ - budget = min(budget * 2, bfqd->bfq_max_budget); - else { - if (budget > 5 * min_budget) - budget -= 4 * min_budget; - else - budget = min_budget; - } - break; - case BFQ_BFQQ_BUDGET_TIMEOUT: - /* - * We double the budget here because: 1) it - * gives the chance to boost the throughput if - * this is not a seeky process (which may have - * bumped into this timeout because of, e.g., - * ZBR), 2) together with charge_full_budget - * it helps give seeky processes higher - * timestamps, and hence be served less - * frequently. - */ - budget = min(budget * 2, bfqd->bfq_max_budget); - break; - case BFQ_BFQQ_BUDGET_EXHAUSTED: - /* - * The process still has backlog, and did not - * let either the budget timeout or the disk - * idling timeout expire. Hence it is not - * seeky, has a short thinktime and may be - * happy with a higher budget too. So - * definitely increase the budget of this good - * candidate to boost the disk throughput. - */ - budget = min(budget * 4, bfqd->bfq_max_budget); - break; - case BFQ_BFQQ_NO_MORE_REQUESTS: - /* - * Leave the budget unchanged. - */ - default: - return; - } - } else /* async queue */ - /* async queues get always the maximum possible budget - * (their ability to dispatch is limited by - * @bfqd->bfq_max_budget_async_rq). - */ - budget = bfqd->bfq_max_budget; - - bfqq->max_budget = budget; - - if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && - bfqq->max_budget > bfqd->bfq_max_budget) - bfqq->max_budget = bfqd->bfq_max_budget; - - /* - * Make sure that we have enough budget for the next request. - * Since the finish time of the bfqq must be kept in sync with - * the budget, be sure to call __bfq_bfqq_expire() after the - * update. - */ - next_rq = bfqq->next_rq; - if (next_rq != NULL) - bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); - else - bfqq->entity.budget = bfqq->max_budget; - - bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", - next_rq != NULL ? blk_rq_sectors(next_rq) : 0, - bfqq->entity.budget); -} - -static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) -{ - unsigned long max_budget; - - /* - * The max_budget calculated when autotuning is equal to the - * amount of sectors transfered in timeout_sync at the - * estimated peak rate. - */ - max_budget = (unsigned long)(peak_rate * 1000 * - timeout >> BFQ_RATE_SHIFT); - - return max_budget; -} - -/* - * In addition to updating the peak rate, checks whether the process - * is "slow", and returns 1 if so. This slow flag is used, in addition - * to the budget timeout, to reduce the amount of service provided to - * seeky processes, and hence reduce their chances to lower the - * throughput. See the code for more details. - */ -static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, - int compensate, enum bfqq_expiration reason) -{ - u64 bw, usecs, expected, timeout; - ktime_t delta; - int update = 0; - - if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) - return 0; - - if (compensate) - delta = bfqd->last_idling_start; - else - delta = ktime_get(); - delta = ktime_sub(delta, bfqd->last_budget_start); - usecs = ktime_to_us(delta); - - /* Don't trust short/unrealistic values. */ - if (usecs < 100 || usecs >= LONG_MAX) - return 0; - - /* - * Calculate the bandwidth for the last slice. We use a 64 bit - * value to store the peak rate, in sectors per usec in fixed - * point math. We do so to have enough precision in the estimate - * and to avoid overflows. - */ - bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; - do_div(bw, (unsigned long)usecs); - - timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); - - /* - * Use only long (> 20ms) intervals to filter out spikes for - * the peak rate estimation. - */ - if (usecs > 20000) { - if (bw > bfqd->peak_rate || - (!BFQQ_SEEKY(bfqq) && - reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { - bfq_log(bfqd, "measured bw =%llu", bw); - /* - * To smooth oscillations use a low-pass filter with - * alpha=7/8, i.e., - * new_rate = (7/8) * old_rate + (1/8) * bw - */ - do_div(bw, 8); - bfqd->peak_rate *= 7; - do_div(bfqd->peak_rate, 8); - bfqd->peak_rate += bw; - update = 1; - bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); - } - - update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; - - if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) - bfqd->peak_rate_samples++; - - if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && - update && bfqd->bfq_user_max_budget == 0) { - bfqd->bfq_max_budget = - bfq_calc_max_budget(bfqd->peak_rate, timeout); - bfq_log(bfqd, "new max_budget=%lu", - bfqd->bfq_max_budget); - } - } - - /* - * If the process has been served for a too short time - * interval to let its possible sequential accesses prevail on - * the initial seek time needed to move the disk head on the - * first sector it requested, then give the process a chance - * and for the moment return false. - */ - if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) - return 0; - - /* - * A process is considered ``slow'' (i.e., seeky, so that we - * cannot treat it fairly in the service domain, as it would - * slow down too much the other processes) if, when a slice - * ends for whatever reason, it has received service at a - * rate that would not be high enough to complete the budget - * before the budget timeout expiration. - */ - expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; - - /* - * Caveat: processes doing IO in the slower disk zones will - * tend to be slow(er) even if not seeky. And the estimated - * peak rate will actually be an average over the disk - * surface. Hence, to not be too harsh with unlucky processes, - * we keep a budget/3 margin of safety before declaring a - * process slow. - */ - return expected > (4 * bfqq->entity.budget) / 3; -} - -/** - * bfq_bfqq_expire - expire a queue. - * @bfqd: device owning the queue. - * @bfqq: the queue to expire. - * @compensate: if true, compensate for the time spent idling. - * @reason: the reason causing the expiration. - * - * - * If the process associated to the queue is slow (i.e., seeky), or in - * case of budget timeout, or, finally, if it is async, we - * artificially charge it an entire budget (independently of the - * actual service it received). As a consequence, the queue will get - * higher timestamps than the correct ones upon reactivation, and - * hence it will be rescheduled as if it had received more service - * than what it actually received. In the end, this class of processes - * will receive less service in proportion to how slowly they consume - * their budgets (and hence how seriously they tend to lower the - * throughput). - * - * In contrast, when a queue expires because it has been idling for - * too much or because it exhausted its budget, we do not touch the - * amount of service it has received. Hence when the queue will be - * reactivated and its timestamps updated, the latter will be in sync - * with the actual service received by the queue until expiration. - * - * Charging a full budget to the first type of queues and the exact - * service to the others has the effect of using the WF2Q+ policy to - * schedule the former on a timeslice basis, without violating the - * service domain guarantees of the latter. - */ -static void bfq_bfqq_expire(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - int compensate, - enum bfqq_expiration reason) -{ - int slow; - BUG_ON(bfqq != bfqd->active_queue); - - /* Update disk peak rate for autotuning and check whether the - * process is slow (see bfq_update_peak_rate). - */ - slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); - - /* - * As above explained, 'punish' slow (i.e., seeky), timed-out - * and async queues, to favor sequential sync workloads. - * - * Processes doing IO in the slower disk zones will tend to be - * slow(er) even if not seeky. Hence, since the estimated peak - * rate is actually an average over the disk surface, these - * processes may timeout just for bad luck. To avoid punishing - * them we do not charge a full budget to a process that - * succeeded in consuming at least 2/3 of its budget. - */ - if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && - bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) - bfq_bfqq_charge_full_budget(bfqq); - - if (bfqd->low_latency && bfqq->raising_coeff == 1) - bfqq->last_rais_start_finish = jiffies; - - if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) { - if(reason != BFQ_BFQQ_BUDGET_TIMEOUT) - bfqq->soft_rt_next_start = - jiffies + - HZ * bfqq->entity.service / - bfqd->bfq_raising_max_softrt_rate; - else - bfqq->soft_rt_next_start = -1; /* infinity */ - } - bfq_log_bfqq(bfqd, bfqq, - "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow, - bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); - - /* Increase, decrease or leave budget unchanged according to reason */ - __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); - __bfq_bfqq_expire(bfqd, bfqq); -} - -/* - * Budget timeout is not implemented through a dedicated timer, but - * just checked on request arrivals and completions, as well as on - * idle timer expirations. - */ -static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) -{ - if (bfq_bfqq_budget_new(bfqq)) - return 0; - - if (time_before(jiffies, bfqq->budget_timeout)) - return 0; - - return 1; -} - -/* - * If we expire a queue that is waiting for the arrival of a new - * request, we may prevent the fictitious timestamp backshifting that - * allows the guarantees of the queue to be preserved (see [1] for - * this tricky aspect). Hence we return true only if this condition - * does not hold, or if the queue is slow enough to deserve only to be - * kicked off for preserving a high throughput. -*/ -static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) -{ - bfq_log_bfqq(bfqq->bfqd, bfqq, - "may_budget_timeout: wr %d left %d timeout %d", - bfq_bfqq_wait_request(bfqq), - bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, - bfq_bfqq_budget_timeout(bfqq)); - - return (!bfq_bfqq_wait_request(bfqq) || - bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) - && - bfq_bfqq_budget_timeout(bfqq); -} - -/* - * Select a queue for service. If we have a current active queue, - * check whether to continue servicing it, or retrieve and set a new one. - */ -static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq, *new_bfqq = NULL; - struct request *next_rq; - enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; - - bfqq = bfqd->active_queue; - if (bfqq == NULL) - goto new_queue; - - bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue"); - - /* - * If another queue has a request waiting within our mean seek - * distance, let it run. The expire code will check for close - * cooperators and put the close queue at the front of the - * service tree. If possible, merge the expiring queue with the - * new bfqq. - */ - new_bfqq = bfq_close_cooperator(bfqd, bfqq); - if (new_bfqq != NULL && bfqq->new_bfqq == NULL) - bfq_setup_merge(bfqq, new_bfqq); - - if (bfq_may_expire_for_budg_timeout(bfqq)) - goto expire; - - next_rq = bfqq->next_rq; - /* - * If bfqq has requests queued and it has enough budget left to - * serve them, keep the queue, otherwise expire it. - */ - if (next_rq != NULL) { - if (bfq_serv_to_charge(next_rq, bfqq) > - bfq_bfqq_budget_left(bfqq)) { - reason = BFQ_BFQQ_BUDGET_EXHAUSTED; - goto expire; - } else { - /* - * The idle timer may be pending because we may not - * disable disk idling even when a new request arrives - */ - if (timer_pending(&bfqd->idle_slice_timer)) { - /* - * If we get here: 1) at least a new request - * has arrived but we have not disabled the - * timer because the request was too small, - * 2) then the block layer has unplugged the - * device, causing the dispatch to be invoked. - * - * Since the device is unplugged, now the - * requests are probably large enough to - * provide a reasonable throughput. - * So we disable idling. - */ - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); - } - if (new_bfqq == NULL) - goto keep_queue; - else - goto expire; - } - } - - /* - * No requests pending. If there is no cooperator, and the active - * queue still has requests in flight or is idling for a new request, - * then keep it. - */ - if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || - (bfqq->dispatched != 0 && bfq_bfqq_idle_window(bfqq) && - !bfq_queue_nonrot_noidle(bfqd, bfqq)))) { - bfqq = NULL; - goto keep_queue; - } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { - /* - * Expiring the queue because there is a close cooperator, - * cancel timer. - */ - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); - } - - reason = BFQ_BFQQ_NO_MORE_REQUESTS; -expire: - bfq_bfqq_expire(bfqd, bfqq, 0, reason); -new_queue: - bfqq = bfq_set_active_queue(bfqd, new_bfqq); - bfq_log(bfqd, "select_queue: new queue %d returned", - bfqq != NULL ? bfqq->pid : 0); -keep_queue: - return bfqq; -} - -static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - if (bfqq->raising_coeff > 1) { /* queue is being boosted */ - struct bfq_entity *entity = &bfqq->entity; - - bfq_log_bfqq(bfqd, bfqq, - "raising period dur %u/%u msec, " - "old raising coeff %u, w %d(%d)", - jiffies_to_msecs(jiffies - - bfqq->last_rais_start_finish), - jiffies_to_msecs(bfqq->raising_cur_max_time), - bfqq->raising_coeff, - bfqq->entity.weight, bfqq->entity.orig_weight); - - BUG_ON(bfqq != bfqd->active_queue && entity->weight != - entity->orig_weight * bfqq->raising_coeff); - if(entity->ioprio_changed) - bfq_log_bfqq(bfqd, bfqq, - "WARN: pending prio change"); - /* - * If too much time has elapsed from the beginning - * of this weight-raising period and process is not soft - * real-time, stop it - */ - if (jiffies - bfqq->last_rais_start_finish > - bfqq->raising_cur_max_time) { - int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && - bfqq->soft_rt_next_start < jiffies; - - bfqq->last_rais_start_finish = jiffies; - if (soft_rt) - bfqq->raising_cur_max_time = - bfqd->bfq_raising_rt_max_time; - else { - bfqq->raising_coeff = 1; - entity->ioprio_changed = 1; - __bfq_entity_update_weight_prio( - bfq_entity_service_tree(entity), - entity); - } - } - } -} - - -/* - * Dispatch one request from bfqq, moving it to the request queue - * dispatch list. - */ -static int bfq_dispatch_request(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - int dispatched = 0; - struct request *rq; - unsigned long service_to_charge; - - BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); - - /* Follow expired path, else get first next available. */ - rq = bfq_check_fifo(bfqq); - if (rq == NULL) - rq = bfqq->next_rq; - service_to_charge = bfq_serv_to_charge(rq, bfqq); - - if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { - /* - * This may happen if the next rq is chosen - * in fifo order instead of sector order. - * The budget is properly dimensioned - * to be always sufficient to serve the next request - * only if it is chosen in sector order. The reason is - * that it would be quite inefficient and little useful - * to always make sure that the budget is large enough - * to serve even the possible next rq in fifo order. - * In fact, requests are seldom served in fifo order. - * - * Expire the queue for budget exhaustion, and - * make sure that the next act_budget is enough - * to serve the next request, even if it comes - * from the fifo expired path. - */ - bfqq->next_rq = rq; - /* - * Since this dispatch is failed, make sure that - * a new one will be performed - */ - if (!bfqd->rq_in_driver) - bfq_schedule_dispatch(bfqd); - goto expire; - } - - /* Finally, insert request into driver dispatch list. */ - bfq_bfqq_served(bfqq, service_to_charge); - bfq_dispatch_insert(bfqd->queue, rq); - - update_raising_data(bfqd, bfqq); - - bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), " - "budg left %lu", - blk_rq_sectors(rq), - (long long unsigned)blk_rq_pos(rq), - bfq_bfqq_budget_left(bfqq)); - - dispatched++; - - if (bfqd->active_cic == NULL) { - atomic_long_inc(&RQ_CIC(rq)->ioc->refcount); - bfqd->active_cic = RQ_CIC(rq); - } - - if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && - dispatched >= bfqd->bfq_max_budget_async_rq) || - bfq_class_idle(bfqq))) - goto expire; - - return dispatched; - -expire: - bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); - return dispatched; -} - -static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) -{ - int dispatched = 0; - - while (bfqq->next_rq != NULL) { - bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); - dispatched++; - } - - BUG_ON(!list_empty(&bfqq->fifo)); - return dispatched; -} - -/* - * Drain our current requests. Used for barriers and when switching - * io schedulers on-the-fly. - */ -static int bfq_forced_dispatch(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq, *n; - struct bfq_service_tree *st; - int dispatched = 0; - - bfqq = bfqd->active_queue; - if (bfqq != NULL) - __bfq_bfqq_expire(bfqd, bfqq); - - /* - * Loop through classes, and be careful to leave the scheduler - * in a consistent state, as feedback mechanisms and vtime - * updates cannot be disabled during the process. - */ - list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { - st = bfq_entity_service_tree(&bfqq->entity); - - dispatched += __bfq_forced_dispatch_bfqq(bfqq); - bfqq->max_budget = bfq_max_budget(bfqd); - - bfq_forget_idle(st); - } - - BUG_ON(bfqd->busy_queues != 0); - - return dispatched; -} - -static int bfq_dispatch_requests(struct request_queue *q, int force) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq; - int max_dispatch; - - bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); - if (bfqd->busy_queues == 0) - return 0; - - if (unlikely(force)) - return bfq_forced_dispatch(bfqd); - - if((bfqq = bfq_select_queue(bfqd)) == NULL) - return 0; - - max_dispatch = bfqd->bfq_quantum; - if (bfq_class_idle(bfqq)) - max_dispatch = 1; - - if (!bfq_bfqq_sync(bfqq)) - max_dispatch = bfqd->bfq_max_budget_async_rq; - - if (bfqq->dispatched >= max_dispatch) { - if (bfqd->busy_queues > 1) - return 0; - if (bfqq->dispatched >= 4 * max_dispatch) - return 0; - } - - if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) - return 0; - - bfq_clear_bfqq_wait_request(bfqq); - BUG_ON(timer_pending(&bfqd->idle_slice_timer)); - - if (! bfq_dispatch_request(bfqd, bfqq)) - return 0; - - bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d" - "(max_disp %d)", bfqq->pid, max_dispatch); - - return 1; -} - -/* - * Task holds one reference to the queue, dropped when task exits. Each rq - * in-flight on this queue also holds a reference, dropped when rq is freed. - * - * Queue lock must be held here. - */ -static void bfq_put_queue(struct bfq_queue *bfqq) -{ - struct bfq_data *bfqd = bfqq->bfqd; - - BUG_ON(atomic_read(&bfqq->ref) <= 0); - - bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, - atomic_read(&bfqq->ref)); - if (!atomic_dec_and_test(&bfqq->ref)) - return; - - BUG_ON(rb_first(&bfqq->sort_list) != NULL); - BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); - BUG_ON(bfqq->entity.tree != NULL); - BUG_ON(bfq_bfqq_busy(bfqq)); - BUG_ON(bfqd->active_queue == bfqq); - - bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); - - kmem_cache_free(bfq_pool, bfqq); -} - -static void bfq_put_cooperator(struct bfq_queue *bfqq) -{ - struct bfq_queue *__bfqq, *next; - - /* - * If this queue was scheduled to merge with another queue, be - * sure to drop the reference taken on that queue (and others in - * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. - */ - __bfqq = bfqq->new_bfqq; - while (__bfqq) { - if (__bfqq == bfqq) { - WARN(1, "bfqq->new_bfqq loop detected.\n"); - break; - } - next = __bfqq->new_bfqq; - bfq_put_queue(__bfqq); - __bfqq = next; - } -} - -static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - if (bfqq == bfqd->active_queue) { - __bfq_bfqq_expire(bfqd, bfqq); - bfq_schedule_dispatch(bfqd); - } - - bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, - atomic_read(&bfqq->ref)); - - bfq_put_cooperator(bfqq); - - bfq_put_queue(bfqq); -} - -/* - * Update the entity prio values; note that the new values will not - * be used until the next (re)activation. - */ -static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) -{ - struct task_struct *tsk = current; - int ioprio_class; - - if (!bfq_bfqq_prio_changed(bfqq)) - return; - - ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); - switch (ioprio_class) { - default: - printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class); - case IOPRIO_CLASS_NONE: - /* - * No prio set, inherit CPU scheduling settings. - */ - bfqq->entity.new_ioprio = task_nice_ioprio(tsk); - bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); - break; - case IOPRIO_CLASS_RT: - bfqq->entity.new_ioprio = task_ioprio(ioc); - bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; - break; - case IOPRIO_CLASS_BE: - bfqq->entity.new_ioprio = task_ioprio(ioc); - bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; - break; - case IOPRIO_CLASS_IDLE: - bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; - bfqq->entity.new_ioprio = 7; - bfq_clear_bfqq_idle_window(bfqq); - break; - } - - bfqq->entity.ioprio_changed = 1; - - /* - * Keep track of original prio settings in case we have to temporarily - * elevate the priority of this queue. - */ - bfqq->org_ioprio = bfqq->entity.new_ioprio; - bfq_clear_bfqq_prio_changed(bfqq); -} - -static void bfq_changed_ioprio(struct io_context *ioc, - struct cfq_io_context *cic) -{ - struct bfq_data *bfqd; - struct bfq_queue *bfqq, *new_bfqq; - struct bfq_group *bfqg; - unsigned long uninitialized_var(flags); - - bfqd = bfq_get_bfqd_locked(&cic->key, &flags); - if (unlikely(bfqd == NULL)) - return; - - bfqq = cic->cfqq[BLK_RW_ASYNC]; - if (bfqq != NULL) { - bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, - sched_data); - new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, cic->ioc, - GFP_ATOMIC); - if (new_bfqq != NULL) { - cic->cfqq[BLK_RW_ASYNC] = new_bfqq; - bfq_log_bfqq(bfqd, bfqq, - "changed_ioprio: bfqq %p %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - } - } - - bfqq = cic->cfqq[BLK_RW_SYNC]; - if (bfqq != NULL) - bfq_mark_bfqq_prio_changed(bfqq); - - bfq_put_bfqd_unlock(bfqd, &flags); -} - -static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - pid_t pid, int is_sync) -{ - RB_CLEAR_NODE(&bfqq->entity.rb_node); - INIT_LIST_HEAD(&bfqq->fifo); - - atomic_set(&bfqq->ref, 0); - bfqq->bfqd = bfqd; - - bfq_mark_bfqq_prio_changed(bfqq); - - if (is_sync) { - if (!bfq_class_idle(bfqq)) - bfq_mark_bfqq_idle_window(bfqq); - bfq_mark_bfqq_sync(bfqq); - } - - /* Tentative initial value to trade off between thr and lat */ - bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; - bfqq->pid = pid; - - bfqq->raising_coeff = 1; - bfqq->last_rais_start_finish = 0; - bfqq->soft_rt_next_start = -1; -} - -static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, - struct bfq_group *bfqg, - int is_sync, - struct io_context *ioc, - gfp_t gfp_mask) -{ - struct bfq_queue *bfqq, *new_bfqq = NULL; - struct cfq_io_context *cic; - -retry: - cic = bfq_cic_lookup(bfqd, ioc); - /* cic always exists here */ - bfqq = cic_to_bfqq(cic, is_sync); - - /* - * Always try a new alloc if we fall back to the OOM bfqq - * originally, since it should just be a temporary situation. - */ - if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { - bfqq = NULL; - if (new_bfqq != NULL) { - bfqq = new_bfqq; - new_bfqq = NULL; - } else if (gfp_mask & __GFP_WAIT) { - spin_unlock_irq(bfqd->queue->queue_lock); - new_bfqq = kmem_cache_alloc_node(bfq_pool, - gfp_mask | __GFP_ZERO, - bfqd->queue->node); - spin_lock_irq(bfqd->queue->queue_lock); - if (new_bfqq != NULL) - goto retry; - } else { - bfqq = kmem_cache_alloc_node(bfq_pool, - gfp_mask | __GFP_ZERO, - bfqd->queue->node); - } - - if (bfqq != NULL) { - bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); - bfq_log_bfqq(bfqd, bfqq, "allocated"); - } else { - bfqq = &bfqd->oom_bfqq; - bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); - } - - bfq_init_prio_data(bfqq, ioc); - bfq_init_entity(&bfqq->entity, bfqg); - } - - if (new_bfqq != NULL) - kmem_cache_free(bfq_pool, new_bfqq); - - return bfqq; -} - -static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, - struct bfq_group *bfqg, - int ioprio_class, int ioprio) -{ - switch (ioprio_class) { - case IOPRIO_CLASS_RT: - return &bfqg->async_bfqq[0][ioprio]; - case IOPRIO_CLASS_BE: - return &bfqg->async_bfqq[1][ioprio]; - case IOPRIO_CLASS_IDLE: - return &bfqg->async_idle_bfqq; - default: - BUG(); - } -} - -static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, - struct bfq_group *bfqg, int is_sync, - struct io_context *ioc, gfp_t gfp_mask) -{ - const int ioprio = task_ioprio(ioc); - const int ioprio_class = task_ioprio_class(ioc); - struct bfq_queue **async_bfqq = NULL; - struct bfq_queue *bfqq = NULL; - - if (!is_sync) { - async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, - ioprio); - bfqq = *async_bfqq; - } - - if (bfqq == NULL) - bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask); - - /* - * Pin the queue now that it's allocated, scheduler exit will prune it. - */ - if (!is_sync && *async_bfqq == NULL) { - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", - bfqq, atomic_read(&bfqq->ref)); - *async_bfqq = bfqq; - } - - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, - atomic_read(&bfqq->ref)); - return bfqq; -} - -static void bfq_update_io_thinktime(struct bfq_data *bfqd, - struct cfq_io_context *cic) -{ - unsigned long elapsed = jiffies - cic->ttime.last_end_request; - unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); - - cic->ttime.ttime_samples = (7*cic->ttime.ttime_samples + 256) / 8; - cic->ttime.ttime_total = (7*cic->ttime.ttime_total + 256*ttime) / 8; - cic->ttime.ttime_mean = (cic->ttime.ttime_total + 128) / cic->ttime.ttime_samples; -} - -static void bfq_update_io_seektime(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct request *rq) -{ - sector_t sdist; - u64 total; - - if (bfqq->last_request_pos < blk_rq_pos(rq)) - sdist = blk_rq_pos(rq) - bfqq->last_request_pos; - else - sdist = bfqq->last_request_pos - blk_rq_pos(rq); - - /* - * Don't allow the seek distance to get too large from the - * odd fragment, pagein, etc. - */ - if (bfqq->seek_samples == 0) /* first request, not really a seek */ - sdist = 0; - else if (bfqq->seek_samples <= 60) /* second & third seek */ - sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); - else - sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); - - bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; - bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; - total = bfqq->seek_total + (bfqq->seek_samples/2); - do_div(total, bfqq->seek_samples); - if (bfq_bfqq_coop(bfqq)) { - /* - * If the mean seektime increases for a (non-seeky) shared - * queue, some cooperator is likely to be idling too much. - * On the contrary, if it decreases, some cooperator has - * probably waked up. - * - */ - if ((sector_t)total < bfqq->seek_mean) - bfq_mark_bfqq_some_coop_idle(bfqq) ; - else if ((sector_t)total > bfqq->seek_mean) - bfq_clear_bfqq_some_coop_idle(bfqq) ; - } - bfqq->seek_mean = (sector_t)total; - - bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, - (u64)bfqq->seek_mean); -} - -/* - * Disable idle window if the process thinks too long or seeks so much that - * it doesn't matter. - */ -static void bfq_update_idle_window(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct cfq_io_context *cic) -{ - int enable_idle; - - /* Don't idle for async or idle io prio class. */ - if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) - return; - - enable_idle = bfq_bfqq_idle_window(bfqq); - - if (atomic_read(&cic->ioc->nr_tasks) == 0 || - bfqd->bfq_slice_idle == 0 || - (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && - bfqq->raising_coeff == 1)) - enable_idle = 0; - else if (bfq_sample_valid(cic->ttime.ttime_samples)) { - if (cic->ttime.ttime_mean > bfqd->bfq_slice_idle && - bfqq->raising_coeff == 1) - enable_idle = 0; - else - enable_idle = 1; - } - bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", - enable_idle); - - if (enable_idle) - bfq_mark_bfqq_idle_window(bfqq); - else - bfq_clear_bfqq_idle_window(bfqq); -} - -/* - * Called when a new fs request (rq) is added to bfqq. Check if there's - * something we should do about it. - */ -static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct request *rq) -{ - struct cfq_io_context *cic = RQ_CIC(rq); - - if (rq->cmd_flags & REQ_META) - bfqq->meta_pending++; - - bfq_update_io_thinktime(bfqd, cic); - bfq_update_io_seektime(bfqd, bfqq, rq); - if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || - !BFQQ_SEEKY(bfqq)) - bfq_update_idle_window(bfqd, bfqq, cic); - - bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", - bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), - (long long unsigned)bfqq->seek_mean); - - bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); - - if (bfqq == bfqd->active_queue) { - /* - * If there is just this request queued and the request - * is small, just exit. - * In this way, if the disk is being idled to wait for a new - * request from the active queue, we avoid unplugging the - * device now. - * - * By doing so, we spare the disk to be committed - * to serve just a small request. On the contrary, we wait for - * the block layer to decide when to unplug the device: - * hopefully, new requests will be merged to this - * one quickly, then the device will be unplugged - * and larger requests will be dispatched. - */ - if (bfqq->queued[rq_is_sync(rq)] == 1 && - blk_rq_sectors(rq) < 32) { - return; - } - if (bfq_bfqq_wait_request(bfqq)) { - /* - * If we are waiting for a request for this queue, let - * it rip immediately and flag that we must not expire - * this queue just now. - */ - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); - /* - * Here we can safely expire the queue, in - * case of budget timeout, without wasting - * guarantees - */ - if (bfq_bfqq_budget_timeout(bfqq)) - bfq_bfqq_expire(bfqd, bfqq, 0, - BFQ_BFQQ_BUDGET_TIMEOUT); - __blk_run_queue(bfqd->queue); - } - } -} - -static void bfq_insert_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq = RQ_BFQQ(rq); - - assert_spin_locked(bfqd->queue->queue_lock); - bfq_init_prio_data(bfqq, RQ_CIC(rq)->ioc); - - bfq_add_rq_rb(rq); - - rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); - list_add_tail(&rq->queuelist, &bfqq->fifo); - - bfq_rq_enqueued(bfqd, bfqq, rq); -} - -static void bfq_update_hw_tag(struct bfq_data *bfqd) -{ - bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, - bfqd->rq_in_driver); - - if (bfqd->hw_tag == 1) - return; - - /* - * This sample is valid if the number of outstanding requests - * is large enough to allow a queueing behavior. Note that the - * sum is not exact, as it's not taking into account deactivated - * requests. - */ - if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) - return; - - if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) - return; - - bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; - bfqd->max_rq_in_driver = 0; - bfqd->hw_tag_samples = 0; -} - -static void bfq_completed_request(struct request_queue *q, struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_data *bfqd = bfqq->bfqd; - const int sync = rq_is_sync(rq); - - bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)", - blk_rq_sectors(rq), sync); - - bfq_update_hw_tag(bfqd); - - WARN_ON(!bfqd->rq_in_driver); - WARN_ON(!bfqq->dispatched); - bfqd->rq_in_driver--; - bfqq->dispatched--; - - if (bfq_bfqq_sync(bfqq)) - bfqd->sync_flight--; - - if (sync) - RQ_CIC(rq)->ttime.last_end_request = jiffies; - - /* - * If this is the active queue, check if it needs to be expired, - * or if we want to idle in case it has no pending requests. - */ - if (bfqd->active_queue == bfqq) { - if (bfq_bfqq_budget_new(bfqq)) - bfq_set_budget_timeout(bfqd); - - /* Idling is disabled also for cooperation issues: - * 1) there is a close cooperator for the queue, or - * 2) the queue is shared and some cooperator is likely - * to be idle (in this case, by not arming the idle timer, - * we try to slow down the queue, to prevent the zones - * of the disk accessed by the active cooperators to become - * too distant from the zone that will be accessed by the - * currently idle cooperators) - */ - if (bfq_may_expire_for_budg_timeout(bfqq)) - bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); - else if (sync && - (bfqd->rq_in_driver == 0 || - bfqq->raising_coeff > 1) - && RB_EMPTY_ROOT(&bfqq->sort_list) - && !bfq_close_cooperator(bfqd, bfqq) - && (!bfq_bfqq_coop(bfqq) || - !bfq_bfqq_some_coop_idle(bfqq))) - bfq_arm_slice_timer(bfqd); - } - - if (!bfqd->rq_in_driver) - bfq_schedule_dispatch(bfqd); -} - -static inline int __bfq_may_queue(struct bfq_queue *bfqq) -{ - if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { - bfq_clear_bfqq_must_alloc(bfqq); - return ELV_MQUEUE_MUST; - } - - return ELV_MQUEUE_MAY; -} - -static int bfq_may_queue(struct request_queue *q, int rw) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct task_struct *tsk = current; - struct cfq_io_context *cic; - struct bfq_queue *bfqq; - - /* - * Don't force setup of a queue from here, as a call to may_queue - * does not necessarily imply that a request actually will be queued. - * So just lookup a possibly existing queue, or return 'may queue' - * if that fails. - */ - cic = bfq_cic_lookup(bfqd, tsk->io_context); - if (cic == NULL) - return ELV_MQUEUE_MAY; - - bfqq = cic_to_bfqq(cic, rw_is_sync(rw)); - if (bfqq != NULL) { - bfq_init_prio_data(bfqq, cic->ioc); - - return __bfq_may_queue(bfqq); - } - - return ELV_MQUEUE_MAY; -} - -/* - * Queue lock held here. - */ -static void bfq_put_request(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - - if (bfqq != NULL) { - const int rw = rq_data_dir(rq); - - BUG_ON(!bfqq->allocated[rw]); - bfqq->allocated[rw]--; - - put_io_context(RQ_CIC(rq)->ioc); - - rq->elevator_private[0] = NULL; - rq->elevator_private[1] = NULL; - - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - } -} - -static struct bfq_queue * -bfq_merge_bfqqs(struct bfq_data *bfqd, struct cfq_io_context *cic, - struct bfq_queue *bfqq) -{ - bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", - (long unsigned)bfqq->new_bfqq->pid); - cic_set_bfqq(cic, bfqq->new_bfqq, 1); - bfq_mark_bfqq_coop(bfqq->new_bfqq); - bfq_put_queue(bfqq); - return cic_to_bfqq(cic, 1); -} - -/* - * Returns NULL if a new bfqq should be allocated, or the old bfqq if this - * was the last process referring to said bfqq. - */ -static struct bfq_queue * -bfq_split_bfqq(struct cfq_io_context *cic, struct bfq_queue *bfqq) -{ - bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); - if (bfqq_process_refs(bfqq) == 1) { - bfqq->pid = current->pid; - bfq_clear_bfqq_some_coop_idle(bfqq); - bfq_clear_bfqq_coop(bfqq); - bfq_clear_bfqq_split_coop(bfqq); - return bfqq; - } - - cic_set_bfqq(cic, NULL, 1); - - bfq_put_cooperator(bfqq); - - bfq_put_queue(bfqq); - return NULL; -} - -/* - * Allocate bfq data structures associated with this request. - */ -static int bfq_set_request(struct request_queue *q, struct request *rq, - gfp_t gfp_mask) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct cfq_io_context *cic; - const int rw = rq_data_dir(rq); - const int is_sync = rq_is_sync(rq); - struct bfq_queue *bfqq; - struct bfq_group *bfqg; - unsigned long flags; - - might_sleep_if(gfp_mask & __GFP_WAIT); - - cic = bfq_get_io_context(bfqd, gfp_mask); - - spin_lock_irqsave(q->queue_lock, flags); - - if (cic == NULL) - goto queue_fail; - - bfqg = bfq_cic_update_cgroup(cic); - -new_queue: - bfqq = cic_to_bfqq(cic, is_sync); - if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { - bfqq = bfq_get_queue(bfqd, bfqg, is_sync, cic->ioc, gfp_mask); - cic_set_bfqq(cic, bfqq, is_sync); - } else { - /* - * If the queue was seeky for too long, break it apart. - */ - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { - bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); - bfqq = bfq_split_bfqq(cic, bfqq); - if (!bfqq) - goto new_queue; - } - - /* - * Check to see if this queue is scheduled to merge with - * another closely cooperating queue. The merging of queues - * happens here as it must be done in process context. - * The reference on new_bfqq was taken in merge_bfqqs. - */ - if (bfqq->new_bfqq != NULL) - bfqq = bfq_merge_bfqqs(bfqd, cic, bfqq); - } - - bfqq->allocated[rw]++; - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, - atomic_read(&bfqq->ref)); - - spin_unlock_irqrestore(q->queue_lock, flags); - - rq->elevator_private[0] = cic; - rq->elevator_private[1] = bfqq; - - return 0; - -queue_fail: - if (cic != NULL) - put_io_context(cic->ioc); - - bfq_schedule_dispatch(bfqd); - spin_unlock_irqrestore(q->queue_lock, flags); - - return 1; -} - -static void bfq_kick_queue(struct work_struct *work) -{ - struct bfq_data *bfqd = - container_of(work, struct bfq_data, unplug_work); - struct request_queue *q = bfqd->queue; - - spin_lock_irq(q->queue_lock); - __blk_run_queue(q); - spin_unlock_irq(q->queue_lock); -} - -/* - * Handler of the expiration of the timer running if the active_queue - * is idling inside its time slice. - */ -static void bfq_idle_slice_timer(unsigned long data) -{ - struct bfq_data *bfqd = (struct bfq_data *)data; - struct bfq_queue *bfqq; - unsigned long flags; - enum bfqq_expiration reason; - - spin_lock_irqsave(bfqd->queue->queue_lock, flags); - - bfqq = bfqd->active_queue; - /* - * Theoretical race here: active_queue can be NULL or different - * from the queue that was idling if the timer handler spins on - * the queue_lock and a new request arrives for the current - * queue and there is a full dispatch cycle that changes the - * active_queue. This can hardly happen, but in the worst case - * we just expire a queue too early. - */ - if (bfqq != NULL) { - bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); - if (bfq_bfqq_budget_timeout(bfqq)) - /* - * Also here the queue can be safely expired - * for budget timeout without wasting - * guarantees - */ - reason = BFQ_BFQQ_BUDGET_TIMEOUT; - else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) - /* - * The queue may not be empty upon timer expiration, - * because we may not disable the timer when the first - * request of the active queue arrives during - * disk idling - */ - reason = BFQ_BFQQ_TOO_IDLE; - else - goto schedule_dispatch; - - bfq_bfqq_expire(bfqd, bfqq, 1, reason); - } - -schedule_dispatch: - bfq_schedule_dispatch(bfqd); - - spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); -} - -static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) -{ - del_timer_sync(&bfqd->idle_slice_timer); - cancel_work_sync(&bfqd->unplug_work); -} - -static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, - struct bfq_queue **bfqq_ptr) -{ - struct bfq_group *root_group = bfqd->root_group; - struct bfq_queue *bfqq = *bfqq_ptr; - - bfq_log(bfqd, "put_async_bfqq: %p", bfqq); - if (bfqq != NULL) { - bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); - bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - *bfqq_ptr = NULL; - } -} - -/* - * Release all the bfqg references to its async queues. If we are - * deallocating the group these queues may still contain requests, so - * we reparent them to the root cgroup (i.e., the only one that will - * exist for sure untill all the requests on a device are gone). - */ -static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) -{ - int i, j; - - for (i = 0; i < 2; i++) - for (j = 0; j < IOPRIO_BE_NR; j++) - __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); - - __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); -} - -static void bfq_exit_queue(struct elevator_queue *e) -{ - struct bfq_data *bfqd = e->elevator_data; - struct request_queue *q = bfqd->queue; - struct bfq_queue *bfqq, *n; - struct cfq_io_context *cic; - - bfq_shutdown_timer_wq(bfqd); - - spin_lock_irq(q->queue_lock); - - while (!list_empty(&bfqd->cic_list)) { - cic = list_entry(bfqd->cic_list.next, struct cfq_io_context, - queue_list); - __bfq_exit_single_io_context(bfqd, cic); - } - - BUG_ON(bfqd->active_queue != NULL); - list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) - bfq_deactivate_bfqq(bfqd, bfqq, 0); - - bfq_disconnect_groups(bfqd); - spin_unlock_irq(q->queue_lock); - - bfq_shutdown_timer_wq(bfqd); - - spin_lock(&cic_index_lock); - ida_remove(&cic_index_ida, bfqd->cic_index); - spin_unlock(&cic_index_lock); - - /* Wait for cic->key accessors to exit their grace periods. */ - synchronize_rcu(); - - BUG_ON(timer_pending(&bfqd->idle_slice_timer)); - - bfq_free_root_group(bfqd); - kfree(bfqd); -} - -static int bfq_alloc_cic_index(void) -{ - int index, error; - - do { - if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(&cic_index_lock); - error = ida_get_new(&cic_index_ida, &index); - spin_unlock(&cic_index_lock); - if (error && error != -EAGAIN) - return error; - } while (error); - - return index; -} - -static void *bfq_init_queue(struct request_queue *q) -{ - struct bfq_group *bfqg; - struct bfq_data *bfqd; - int i; - - i = bfq_alloc_cic_index(); - if (i < 0) - return NULL; - - bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node); - if (bfqd == NULL) - return NULL; - - bfqd->cic_index = i; - - /* - * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. - * Grab a permanent reference to it, so that the normal code flow - * will not attempt to free it. - */ - bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); - atomic_inc(&bfqd->oom_bfqq.ref); - - INIT_LIST_HEAD(&bfqd->cic_list); - - bfqd->queue = q; - - bfqg = bfq_alloc_root_group(bfqd, q->node); - if (bfqg == NULL) { - kfree(bfqd); - return NULL; - } - - bfqd->root_group = bfqg; - - init_timer(&bfqd->idle_slice_timer); - bfqd->idle_slice_timer.function = bfq_idle_slice_timer; - bfqd->idle_slice_timer.data = (unsigned long)bfqd; - - bfqd->rq_pos_tree = RB_ROOT; - - INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); - - INIT_LIST_HEAD(&bfqd->active_list); - INIT_LIST_HEAD(&bfqd->idle_list); - - bfqd->hw_tag = -1; - - bfqd->bfq_max_budget = bfq_default_max_budget; - - bfqd->bfq_quantum = bfq_quantum; - bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; - bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; - bfqd->bfq_back_max = bfq_back_max; - bfqd->bfq_back_penalty = bfq_back_penalty; - bfqd->bfq_slice_idle = bfq_slice_idle; - bfqd->bfq_class_idle_last_service = 0; - bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; - bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; - bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; - - bfqd->low_latency = true; - - bfqd->bfq_raising_coeff = 20; - bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300); - bfqd->bfq_raising_max_time = 0; - bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000); - bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500); - bfqd->bfq_raising_max_softrt_rate = 7000; - - /* Initially estimate the device's peak rate as the reference rate */ - if (blk_queue_nonrot(bfqd->queue)) { - bfqd->RT_prod = R_nonrot * T_nonrot; - bfqd->peak_rate = R_nonrot; - } else { - bfqd->RT_prod = R_rot * T_rot; - bfqd->peak_rate = R_rot; - } - - return bfqd; -} - -static void bfq_slab_kill(void) -{ - if (bfq_pool != NULL) - kmem_cache_destroy(bfq_pool); - if (bfq_ioc_pool != NULL) - kmem_cache_destroy(bfq_ioc_pool); -} - -static int __init bfq_slab_setup(void) -{ - bfq_pool = KMEM_CACHE(bfq_queue, 0); - if (bfq_pool == NULL) - goto fail; - - bfq_ioc_pool = kmem_cache_create("bfq_io_context", - sizeof(struct cfq_io_context), - __alignof__(struct cfq_io_context), - 0, NULL); - if (bfq_ioc_pool == NULL) - goto fail; - - return 0; -fail: - bfq_slab_kill(); - return -ENOMEM; -} - -static ssize_t bfq_var_show(unsigned int var, char *page) -{ - return sprintf(page, "%d\n", var); -} - -static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count) -{ - unsigned long new_val; - int ret = strict_strtoul(page, 10, &new_val); - - if (ret == 0) - *var = new_val; - - return count; -} - -static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page) -{ - struct bfq_data *bfqd = e->elevator_data; - return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ? - bfqd->bfq_raising_max_time : - bfq_wrais_duration(bfqd)); -} - -static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) -{ - struct bfq_queue *bfqq; - struct bfq_data *bfqd = e->elevator_data; - ssize_t num_char = 0; - - num_char += sprintf(page + num_char, "Active:\n"); - list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { - num_char += sprintf(page + num_char, - "pid%d: weight %hu, dur %d/%u\n", - bfqq->pid, - bfqq->entity.weight, - jiffies_to_msecs(jiffies - - bfqq->last_rais_start_finish), - jiffies_to_msecs(bfqq->raising_cur_max_time)); - } - num_char += sprintf(page + num_char, "Idle:\n"); - list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { - num_char += sprintf(page + num_char, - "pid%d: weight %hu, dur %d/%u\n", - bfqq->pid, - bfqq->entity.weight, - jiffies_to_msecs(jiffies - - bfqq->last_rais_start_finish), - jiffies_to_msecs(bfqq->raising_cur_max_time)); - } - return num_char; -} - -#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -static ssize_t __FUNC(struct elevator_queue *e, char *page) \ -{ \ - struct bfq_data *bfqd = e->elevator_data; \ - unsigned int __data = __VAR; \ - if (__CONV) \ - __data = jiffies_to_msecs(__data); \ - return bfq_var_show(__data, (page)); \ -} -SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); -SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); -SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); -SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); -SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); -SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); -SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); -SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0); -SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); -SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); -SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); -SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0); -SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1); -SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time, - 1); -SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show, - bfqd->bfq_raising_min_inter_arr_async, - 1); -SHOW_FUNCTION(bfq_raising_max_softrt_rate_show, - bfqd->bfq_raising_max_softrt_rate, 0); -#undef SHOW_FUNCTION - -#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -static ssize_t \ -__FUNC(struct elevator_queue *e, const char *page, size_t count) \ -{ \ - struct bfq_data *bfqd = e->elevator_data; \ - unsigned long __data; \ - int ret = bfq_var_store(&__data, (page), count); \ - if (__data < (MIN)) \ - __data = (MIN); \ - else if (__data > (MAX)) \ - __data = (MAX); \ - if (__CONV) \ - *(__PTR) = msecs_to_jiffies(__data); \ - else \ - *(__PTR) = __data; \ - return ret; \ -} -STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); -STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, - INT_MAX, 1); -STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, - INT_MAX, 1); -STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); -STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, - INT_MAX, 0); -STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); -STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, - 1, INT_MAX, 0); -STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, - INT_MAX, 1); -STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1, - INT_MAX, 0); -STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0, - INT_MAX, 1); -STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0, - INT_MAX, 1); -STORE_FUNCTION(bfq_raising_min_idle_time_store, - &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1); -STORE_FUNCTION(bfq_raising_min_inter_arr_async_store, - &bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1); -STORE_FUNCTION(bfq_raising_max_softrt_rate_store, - &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0); -#undef STORE_FUNCTION - -/* do nothing for the moment */ -static ssize_t bfq_weights_store(struct elevator_queue *e, - const char *page, size_t count) -{ - return count; -} - -static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) -{ - u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); - - if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) - return bfq_calc_max_budget(bfqd->peak_rate, timeout); - else - return bfq_default_max_budget; -} - -static ssize_t bfq_max_budget_store(struct elevator_queue *e, - const char *page, size_t count) -{ - struct bfq_data *bfqd = e->elevator_data; - unsigned long __data; - int ret = bfq_var_store(&__data, (page), count); - - if (__data == 0) - bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); - else { - if (__data > INT_MAX) - __data = INT_MAX; - bfqd->bfq_max_budget = __data; - } - - bfqd->bfq_user_max_budget = __data; - - return ret; -} - -static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, - const char *page, size_t count) -{ - struct bfq_data *bfqd = e->elevator_data; - unsigned long __data; - int ret = bfq_var_store(&__data, (page), count); - - if (__data < 1) - __data = 1; - else if (__data > INT_MAX) - __data = INT_MAX; - - bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); - if (bfqd->bfq_user_max_budget == 0) - bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); - - return ret; -} - -static ssize_t bfq_low_latency_store(struct elevator_queue *e, - const char *page, size_t count) -{ - struct bfq_data *bfqd = e->elevator_data; - unsigned long __data; - int ret = bfq_var_store(&__data, (page), count); - - if (__data > 1) - __data = 1; - bfqd->low_latency = __data; - - return ret; -} - -#define BFQ_ATTR(name) \ - __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) - -static struct elv_fs_entry bfq_attrs[] = { - BFQ_ATTR(quantum), - BFQ_ATTR(fifo_expire_sync), - BFQ_ATTR(fifo_expire_async), - BFQ_ATTR(back_seek_max), - BFQ_ATTR(back_seek_penalty), - BFQ_ATTR(slice_idle), - BFQ_ATTR(max_budget), - BFQ_ATTR(max_budget_async_rq), - BFQ_ATTR(timeout_sync), - BFQ_ATTR(timeout_async), - BFQ_ATTR(low_latency), - BFQ_ATTR(raising_coeff), - BFQ_ATTR(raising_max_time), - BFQ_ATTR(raising_rt_max_time), - BFQ_ATTR(raising_min_idle_time), - BFQ_ATTR(raising_min_inter_arr_async), - BFQ_ATTR(raising_max_softrt_rate), - BFQ_ATTR(weights), - __ATTR_NULL -}; - -static struct elevator_type iosched_bfq = { - .ops = { - .elevator_merge_fn = bfq_merge, - .elevator_merged_fn = bfq_merged_request, - .elevator_merge_req_fn = bfq_merged_requests, - .elevator_allow_merge_fn = bfq_allow_merge, - .elevator_dispatch_fn = bfq_dispatch_requests, - .elevator_add_req_fn = bfq_insert_request, - .elevator_activate_req_fn = bfq_activate_request, - .elevator_deactivate_req_fn = bfq_deactivate_request, - .elevator_completed_req_fn = bfq_completed_request, - .elevator_former_req_fn = elv_rb_former_request, - .elevator_latter_req_fn = elv_rb_latter_request, - .elevator_set_req_fn = bfq_set_request, - .elevator_put_req_fn = bfq_put_request, - .elevator_may_queue_fn = bfq_may_queue, - .elevator_init_fn = bfq_init_queue, - .elevator_exit_fn = bfq_exit_queue, - .trim = bfq_free_io_context, - }, - .elevator_attrs = bfq_attrs, - .elevator_name = "bfq", - .elevator_owner = THIS_MODULE, -}; - -static int __init bfq_init(void) -{ - /* - * Can be 0 on HZ < 1000 setups. - */ - if (bfq_slice_idle == 0) - bfq_slice_idle = 1; - - if (bfq_timeout_async == 0) - bfq_timeout_async = 1; - - if (bfq_slab_setup()) - return -ENOMEM; - - elv_register(&iosched_bfq); - - return 0; -} - -static void __exit bfq_exit(void) -{ - DECLARE_COMPLETION_ONSTACK(all_gone); - elv_unregister(&iosched_bfq); - bfq_ioc_gone = &all_gone; - /* bfq_ioc_gone's update must be visible before reading bfq_ioc_count */ - smp_wmb(); - if (elv_ioc_count_read(bfq_ioc_count) != 0) - wait_for_completion(&all_gone); - ida_destroy(&cic_index_ida); - bfq_slab_kill(); -} - -module_init(bfq_init); -module_exit(bfq_exit); - -MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c deleted file mode 100644 index fd50b7fd130..00000000000 --- a/block/bfq-sched.c +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * BFQ: Hierarchical B-WF2Q+ scheduler. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - */ - -#ifdef CONFIG_CGROUP_BFQIO -#define for_each_entity(entity) \ - for (; entity != NULL; entity = entity->parent) - -#define for_each_entity_safe(entity, parent) \ - for (; entity && ({ parent = entity->parent; 1; }); entity = parent) - -static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, - int extract, - struct bfq_data *bfqd); - -static inline void bfq_update_budget(struct bfq_entity *next_active) -{ - struct bfq_entity *bfqg_entity; - struct bfq_group *bfqg; - struct bfq_sched_data *group_sd; - - BUG_ON(next_active == NULL); - - group_sd = next_active->sched_data; - - bfqg = container_of(group_sd, struct bfq_group, sched_data); - /* - * bfq_group's my_entity field is not NULL only if the group - * is not the root group. We must not touch the root entity - * as it must never become an active entity. - */ - bfqg_entity = bfqg->my_entity; - if (bfqg_entity != NULL) - bfqg_entity->budget = next_active->budget; -} - -static int bfq_update_next_active(struct bfq_sched_data *sd) -{ - struct bfq_entity *next_active; - - if (sd->active_entity != NULL) - /* will update/requeue at the end of service */ - return 0; - - /* - * NOTE: this can be improved in many ways, such as returning - * 1 (and thus propagating upwards the update) only when the - * budget changes, or caching the bfqq that will be scheduled - * next from this subtree. By now we worry more about - * correctness than about performance... - */ - next_active = bfq_lookup_next_entity(sd, 0, NULL); - sd->next_active = next_active; - - if (next_active != NULL) - bfq_update_budget(next_active); - - return 1; -} - -static inline void bfq_check_next_active(struct bfq_sched_data *sd, - struct bfq_entity *entity) -{ - BUG_ON(sd->next_active != entity); -} -#else -#define for_each_entity(entity) \ - for (; entity != NULL; entity = NULL) - -#define for_each_entity_safe(entity, parent) \ - for (parent = NULL; entity != NULL; entity = parent) - -static inline int bfq_update_next_active(struct bfq_sched_data *sd) -{ - return 0; -} - -static inline void bfq_check_next_active(struct bfq_sched_data *sd, - struct bfq_entity *entity) -{ -} - -static inline void bfq_update_budget(struct bfq_entity *next_active) -{ -} -#endif - -/* - * Shift for timestamp calculations. This actually limits the maximum - * service allowed in one timestamp delta (small shift values increase it), - * the maximum total weight that can be used for the queues in the system - * (big shift values increase it), and the period of virtual time wraparounds. - */ -#define WFQ_SERVICE_SHIFT 22 - -/** - * bfq_gt - compare two timestamps. - * @a: first ts. - * @b: second ts. - * - * Return @a > @b, dealing with wrapping correctly. - */ -static inline int bfq_gt(u64 a, u64 b) -{ - return (s64)(a - b) > 0; -} - -static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = NULL; - - BUG_ON(entity == NULL); - - if (entity->my_sched_data == NULL) - bfqq = container_of(entity, struct bfq_queue, entity); - - return bfqq; -} - - -/** - * bfq_delta - map service into the virtual time domain. - * @service: amount of service. - * @weight: scale factor (weight of an entity or weight sum). - */ -static inline u64 bfq_delta(unsigned long service, - unsigned long weight) -{ - u64 d = (u64)service << WFQ_SERVICE_SHIFT; - - do_div(d, weight); - return d; -} - -/** - * bfq_calc_finish - assign the finish time to an entity. - * @entity: the entity to act upon. - * @service: the service to be charged to the entity. - */ -static inline void bfq_calc_finish(struct bfq_entity *entity, - unsigned long service) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(entity->weight == 0); - - entity->finish = entity->start + - bfq_delta(service, entity->weight); - - if (bfqq != NULL) { - bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: serv %lu, w %d", - service, entity->weight); - bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: start %llu, finish %llu, delta %llu", - entity->start, entity->finish, - bfq_delta(service, entity->weight)); - } -} - -/** - * bfq_entity_of - get an entity from a node. - * @node: the node field of the entity. - * - * Convert a node pointer to the relative entity. This is used only - * to simplify the logic of some functions and not as the generic - * conversion mechanism because, e.g., in the tree walking functions, - * the check for a %NULL value would be redundant. - */ -static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) -{ - struct bfq_entity *entity = NULL; - - if (node != NULL) - entity = rb_entry(node, struct bfq_entity, rb_node); - - return entity; -} - -/** - * bfq_extract - remove an entity from a tree. - * @root: the tree root. - * @entity: the entity to remove. - */ -static inline void bfq_extract(struct rb_root *root, - struct bfq_entity *entity) -{ - BUG_ON(entity->tree != root); - - entity->tree = NULL; - rb_erase(&entity->rb_node, root); -} - -/** - * bfq_idle_extract - extract an entity from the idle tree. - * @st: the service tree of the owning @entity. - * @entity: the entity being removed. - */ -static void bfq_idle_extract(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct rb_node *next; - - BUG_ON(entity->tree != &st->idle); - - if (entity == st->first_idle) { - next = rb_next(&entity->rb_node); - st->first_idle = bfq_entity_of(next); - } - - if (entity == st->last_idle) { - next = rb_prev(&entity->rb_node); - st->last_idle = bfq_entity_of(next); - } - - bfq_extract(&st->idle, entity); - - if (bfqq != NULL) - list_del(&bfqq->bfqq_list); -} - -/** - * bfq_insert - generic tree insertion. - * @root: tree root. - * @entity: entity to insert. - * - * This is used for the idle and the active tree, since they are both - * ordered by finish time. - */ -static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) -{ - struct bfq_entity *entry; - struct rb_node **node = &root->rb_node; - struct rb_node *parent = NULL; - - BUG_ON(entity->tree != NULL); - - while (*node != NULL) { - parent = *node; - entry = rb_entry(parent, struct bfq_entity, rb_node); - - if (bfq_gt(entry->finish, entity->finish)) - node = &parent->rb_left; - else - node = &parent->rb_right; - } - - rb_link_node(&entity->rb_node, parent, node); - rb_insert_color(&entity->rb_node, root); - - entity->tree = root; -} - -/** - * bfq_update_min - update the min_start field of a entity. - * @entity: the entity to update. - * @node: one of its children. - * - * This function is called when @entity may store an invalid value for - * min_start due to updates to the active tree. The function assumes - * that the subtree rooted at @node (which may be its left or its right - * child) has a valid min_start value. - */ -static inline void bfq_update_min(struct bfq_entity *entity, - struct rb_node *node) -{ - struct bfq_entity *child; - - if (node != NULL) { - child = rb_entry(node, struct bfq_entity, rb_node); - if (bfq_gt(entity->min_start, child->min_start)) - entity->min_start = child->min_start; - } -} - -/** - * bfq_update_active_node - recalculate min_start. - * @node: the node to update. - * - * @node may have changed position or one of its children may have moved, - * this function updates its min_start value. The left and right subtrees - * are assumed to hold a correct min_start value. - */ -static inline void bfq_update_active_node(struct rb_node *node) -{ - struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); - - entity->min_start = entity->start; - bfq_update_min(entity, node->rb_right); - bfq_update_min(entity, node->rb_left); -} - -/** - * bfq_update_active_tree - update min_start for the whole active tree. - * @node: the starting node. - * - * @node must be the deepest modified node after an update. This function - * updates its min_start using the values held by its children, assuming - * that they did not change, and then updates all the nodes that may have - * changed in the path to the root. The only nodes that may have changed - * are the ones in the path or their siblings. - */ -static void bfq_update_active_tree(struct rb_node *node) -{ - struct rb_node *parent; - -up: - bfq_update_active_node(node); - - parent = rb_parent(node); - if (parent == NULL) - return; - - if (node == parent->rb_left && parent->rb_right != NULL) - bfq_update_active_node(parent->rb_right); - else if (parent->rb_left != NULL) - bfq_update_active_node(parent->rb_left); - - node = parent; - goto up; -} - -/** - * bfq_active_insert - insert an entity in the active tree of its group/device. - * @st: the service tree of the entity. - * @entity: the entity being inserted. - * - * The active tree is ordered by finish time, but an extra key is kept - * per each node, containing the minimum value for the start times of - * its children (and the node itself), so it's possible to search for - * the eligible node with the lowest finish time in logarithmic time. - */ -static void bfq_active_insert(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct rb_node *node = &entity->rb_node; - - bfq_insert(&st->active, entity); - - if (node->rb_left != NULL) - node = node->rb_left; - else if (node->rb_right != NULL) - node = node->rb_right; - - bfq_update_active_tree(node); - - if (bfqq != NULL) - list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); -} - -/** - * bfq_ioprio_to_weight - calc a weight from an ioprio. - * @ioprio: the ioprio value to convert. - */ -static unsigned short bfq_ioprio_to_weight(int ioprio) -{ - WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); - return IOPRIO_BE_NR - ioprio; -} - -/** - * bfq_weight_to_ioprio - calc an ioprio from a weight. - * @weight: the weight value to convert. - * - * To preserve as mush as possible the old only-ioprio user interface, - * 0 is used as an escape ioprio value for weights (numerically) equal or - * larger than IOPRIO_BE_NR - */ -static unsigned short bfq_weight_to_ioprio(int weight) -{ - WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); - return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; -} - -static inline void bfq_get_entity(struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - if (bfqq != NULL) { - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", - bfqq, atomic_read(&bfqq->ref)); - } -} - -/** - * bfq_find_deepest - find the deepest node that an extraction can modify. - * @node: the node being removed. - * - * Do the first step of an extraction in an rb tree, looking for the - * node that will replace @node, and returning the deepest node that - * the following modifications to the tree can touch. If @node is the - * last node in the tree return %NULL. - */ -static struct rb_node *bfq_find_deepest(struct rb_node *node) -{ - struct rb_node *deepest; - - if (node->rb_right == NULL && node->rb_left == NULL) - deepest = rb_parent(node); - else if (node->rb_right == NULL) - deepest = node->rb_left; - else if (node->rb_left == NULL) - deepest = node->rb_right; - else { - deepest = rb_next(node); - if (deepest->rb_right != NULL) - deepest = deepest->rb_right; - else if (rb_parent(deepest) != node) - deepest = rb_parent(deepest); - } - - return deepest; -} - -/** - * bfq_active_extract - remove an entity from the active tree. - * @st: the service_tree containing the tree. - * @entity: the entity being removed. - */ -static void bfq_active_extract(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct rb_node *node; - - node = bfq_find_deepest(&entity->rb_node); - bfq_extract(&st->active, entity); - - if (node != NULL) - bfq_update_active_tree(node); - - if (bfqq != NULL) - list_del(&bfqq->bfqq_list); -} - -/** - * bfq_idle_insert - insert an entity into the idle tree. - * @st: the service tree containing the tree. - * @entity: the entity to insert. - */ -static void bfq_idle_insert(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct bfq_entity *first_idle = st->first_idle; - struct bfq_entity *last_idle = st->last_idle; - - if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) - st->first_idle = entity; - if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) - st->last_idle = entity; - - bfq_insert(&st->idle, entity); - - if (bfqq != NULL) - list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); -} - -/** - * bfq_forget_entity - remove an entity from the wfq trees. - * @st: the service tree. - * @entity: the entity being removed. - * - * Update the device status and forget everything about @entity, putting - * the device reference to it, if it is a queue. Entities belonging to - * groups are not refcounted. - */ -static void bfq_forget_entity(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(!entity->on_st); - - entity->on_st = 0; - st->wsum -= entity->weight; - if (bfqq != NULL) { - bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - } -} - -/** - * bfq_put_idle_entity - release the idle tree ref of an entity. - * @st: service tree for the entity. - * @entity: the entity being released. - */ -static void bfq_put_idle_entity(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - bfq_idle_extract(st, entity); - bfq_forget_entity(st, entity); -} - -/** - * bfq_forget_idle - update the idle tree if necessary. - * @st: the service tree to act upon. - * - * To preserve the global O(log N) complexity we only remove one entry here; - * as the idle tree will not grow indefinitely this can be done safely. - */ -static void bfq_forget_idle(struct bfq_service_tree *st) -{ - struct bfq_entity *first_idle = st->first_idle; - struct bfq_entity *last_idle = st->last_idle; - - if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && - !bfq_gt(last_idle->finish, st->vtime)) { - /* - * Forget the whole idle tree, increasing the vtime past - * the last finish time of idle entities. - */ - st->vtime = last_idle->finish; - } - - if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) - bfq_put_idle_entity(st, first_idle); -} - -static struct bfq_service_tree * -__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - struct bfq_entity *entity) -{ - struct bfq_service_tree *new_st = old_st; - - if (entity->ioprio_changed) { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(old_st->wsum < entity->weight); - old_st->wsum -= entity->weight; - - if (entity->new_weight != entity->orig_weight) { - entity->orig_weight = entity->new_weight; - entity->ioprio = - bfq_weight_to_ioprio(entity->orig_weight); - } else if (entity->new_ioprio != entity->ioprio) { - entity->ioprio = entity->new_ioprio; - entity->orig_weight = - bfq_ioprio_to_weight(entity->ioprio); - } else - entity->new_weight = entity->orig_weight = - bfq_ioprio_to_weight(entity->ioprio); - - entity->ioprio_class = entity->new_ioprio_class; - entity->ioprio_changed = 0; - - /* - * NOTE: here we may be changing the weight too early, - * this will cause unfairness. The correct approach - * would have required additional complexity to defer - * weight changes to the proper time instants (i.e., - * when entity->finish <= old_st->vtime). - */ - new_st = bfq_entity_service_tree(entity); - entity->weight = entity->orig_weight * - (bfqq != NULL ? bfqq->raising_coeff : 1); - new_st->wsum += entity->weight; - - if (new_st != old_st) - entity->start = new_st->vtime; - } - - return new_st; -} - -/** - * bfq_bfqq_served - update the scheduler status after selection for service. - * @bfqq: the queue being served. - * @served: bytes to transfer. - * - * NOTE: this can be optimized, as the timestamps of upper level entities - * are synchronized every time a new bfqq is selected for service. By now, - * we keep it to better check consistency. - */ -static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) -{ - struct bfq_entity *entity = &bfqq->entity; - struct bfq_service_tree *st; - - for_each_entity(entity) { - st = bfq_entity_service_tree(entity); - - entity->service += served; - BUG_ON(entity->service > entity->budget); - BUG_ON(st->wsum == 0); - - st->vtime += bfq_delta(served, st->wsum); - bfq_forget_idle(st); - } - bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); -} - -/** - * bfq_bfqq_charge_full_budget - set the service to the entity budget. - * @bfqq: the queue that needs a service update. - * - * When it's not possible to be fair in the service domain, because - * a queue is not consuming its budget fast enough (the meaning of - * fast depends on the timeout parameter), we charge it a full - * budget. In this way we should obtain a sort of time-domain - * fairness among all the seeky/slow queues. - */ -static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - - bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); - - bfq_bfqq_served(bfqq, entity->budget - entity->service); -} - -/** - * __bfq_activate_entity - activate an entity. - * @entity: the entity being activated. - * - * Called whenever an entity is activated, i.e., it is not active and one - * of its children receives a new request, or has to be reactivated due to - * budget exhaustion. It uses the current budget of the entity (and the - * service received if @entity is active) of the queue to calculate its - * timestamps. - */ -static void __bfq_activate_entity(struct bfq_entity *entity) -{ - struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - - if (entity == sd->active_entity) { - BUG_ON(entity->tree != NULL); - /* - * If we are requeueing the current entity we have - * to take care of not charging to it service it has - * not received. - */ - bfq_calc_finish(entity, entity->service); - entity->start = entity->finish; - sd->active_entity = NULL; - } else if (entity->tree == &st->active) { - /* - * Requeueing an entity due to a change of some - * next_active entity below it. We reuse the old - * start time. - */ - bfq_active_extract(st, entity); - } else if (entity->tree == &st->idle) { - /* - * Must be on the idle tree, bfq_idle_extract() will - * check for that. - */ - bfq_idle_extract(st, entity); - entity->start = bfq_gt(st->vtime, entity->finish) ? - st->vtime : entity->finish; - } else { - /* - * The finish time of the entity may be invalid, and - * it is in the past for sure, otherwise the queue - * would have been on the idle tree. - */ - entity->start = st->vtime; - st->wsum += entity->weight; - bfq_get_entity(entity); - - BUG_ON(entity->on_st); - entity->on_st = 1; - } - - st = __bfq_entity_update_weight_prio(st, entity); - bfq_calc_finish(entity, entity->budget); - bfq_active_insert(st, entity); -} - -/** - * bfq_activate_entity - activate an entity and its ancestors if necessary. - * @entity: the entity to activate. - * - * Activate @entity and all the entities on the path from it to the root. - */ -static void bfq_activate_entity(struct bfq_entity *entity) -{ - struct bfq_sched_data *sd; - - for_each_entity(entity) { - __bfq_activate_entity(entity); - - sd = entity->sched_data; - if (!bfq_update_next_active(sd)) - /* - * No need to propagate the activation to the - * upper entities, as they will be updated when - * the active entity is rescheduled. - */ - break; - } -} - -/** - * __bfq_deactivate_entity - deactivate an entity from its service tree. - * @entity: the entity to deactivate. - * @requeue: if false, the entity will not be put into the idle tree. - * - * Deactivate an entity, independently from its previous state. If the - * entity was not on a service tree just return, otherwise if it is on - * any scheduler tree, extract it from that tree, and if necessary - * and if the caller did not specify @requeue, put it on the idle tree. - * - * Return %1 if the caller should update the entity hierarchy, i.e., - * if the entity was under service or if it was the next_active for - * its sched_data; return %0 otherwise. - */ -static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -{ - struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - int was_active = entity == sd->active_entity; - int ret = 0; - - if (!entity->on_st) - return 0; - - BUG_ON(was_active && entity->tree != NULL); - - if (was_active) { - bfq_calc_finish(entity, entity->service); - sd->active_entity = NULL; - } else if (entity->tree == &st->active) - bfq_active_extract(st, entity); - else if (entity->tree == &st->idle) - bfq_idle_extract(st, entity); - else if (entity->tree != NULL) - BUG(); - - if (was_active || sd->next_active == entity) - ret = bfq_update_next_active(sd); - - if (!requeue || !bfq_gt(entity->finish, st->vtime)) - bfq_forget_entity(st, entity); - else - bfq_idle_insert(st, entity); - - BUG_ON(sd->active_entity == entity); - BUG_ON(sd->next_active == entity); - - return ret; -} - -/** - * bfq_deactivate_entity - deactivate an entity. - * @entity: the entity to deactivate. - * @requeue: true if the entity can be put on the idle tree - */ -static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -{ - struct bfq_sched_data *sd; - struct bfq_entity *parent; - - for_each_entity_safe(entity, parent) { - sd = entity->sched_data; - - if (!__bfq_deactivate_entity(entity, requeue)) - /* - * The parent entity is still backlogged, and - * we don't need to update it as it is still - * under service. - */ - break; - - if (sd->next_active != NULL) - /* - * The parent entity is still backlogged and - * the budgets on the path towards the root - * need to be updated. - */ - goto update; - - /* - * If we reach there the parent is no more backlogged and - * we want to propagate the dequeue upwards. - */ - requeue = 1; - } - - return; - -update: - entity = parent; - for_each_entity(entity) { - __bfq_activate_entity(entity); - - sd = entity->sched_data; - if (!bfq_update_next_active(sd)) - break; - } -} - -/** - * bfq_update_vtime - update vtime if necessary. - * @st: the service tree to act upon. - * - * If necessary update the service tree vtime to have at least one - * eligible entity, skipping to its start time. Assumes that the - * active tree of the device is not empty. - * - * NOTE: this hierarchical implementation updates vtimes quite often, - * we may end up with reactivated tasks getting timestamps after a - * vtime skip done because we needed a ->first_active entity on some - * intermediate node. - */ -static void bfq_update_vtime(struct bfq_service_tree *st) -{ - struct bfq_entity *entry; - struct rb_node *node = st->active.rb_node; - - entry = rb_entry(node, struct bfq_entity, rb_node); - if (bfq_gt(entry->min_start, st->vtime)) { - st->vtime = entry->min_start; - bfq_forget_idle(st); - } -} - -/** - * bfq_first_active - find the eligible entity with the smallest finish time - * @st: the service tree to select from. - * - * This function searches the first schedulable entity, starting from the - * root of the tree and going on the left every time on this side there is - * a subtree with at least one eligible (start >= vtime) entity. The path - * on the right is followed only if a) the left subtree contains no eligible - * entities and b) no eligible entity has been found yet. - */ -static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) -{ - struct bfq_entity *entry, *first = NULL; - struct rb_node *node = st->active.rb_node; - - while (node != NULL) { - entry = rb_entry(node, struct bfq_entity, rb_node); -left: - if (!bfq_gt(entry->start, st->vtime)) - first = entry; - - BUG_ON(bfq_gt(entry->min_start, st->vtime)); - - if (node->rb_left != NULL) { - entry = rb_entry(node->rb_left, - struct bfq_entity, rb_node); - if (!bfq_gt(entry->min_start, st->vtime)) { - node = node->rb_left; - goto left; - } - } - if (first != NULL) - break; - node = node->rb_right; - } - - BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); - return first; -} - -/** - * __bfq_lookup_next_entity - return the first eligible entity in @st. - * @st: the service tree. - * - * Update the virtual time in @st and return the first eligible entity - * it contains. - */ -static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, - bool force) -{ - struct bfq_entity *entity, *new_next_active = NULL; - - if (RB_EMPTY_ROOT(&st->active)) - return NULL; - - bfq_update_vtime(st); - entity = bfq_first_active_entity(st); - BUG_ON(bfq_gt(entity->start, st->vtime)); - - /* - * If the chosen entity does not match with the sched_data's - * next_active and we are forcedly serving the IDLE priority - * class tree, bubble up budget update. - */ - if (unlikely(force && entity != entity->sched_data->next_active)) { - new_next_active = entity; - for_each_entity(new_next_active) - bfq_update_budget(new_next_active); - } - - return entity; -} - -/** - * bfq_lookup_next_entity - return the first eligible entity in @sd. - * @sd: the sched_data. - * @extract: if true the returned entity will be also extracted from @sd. - * - * NOTE: since we cache the next_active entity at each level of the - * hierarchy, the complexity of the lookup can be decreased with - * absolutely no effort just returning the cached next_active value; - * we prefer to do full lookups to test the consistency of * the data - * structures. - */ -static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, - int extract, - struct bfq_data *bfqd) -{ - struct bfq_service_tree *st = sd->service_tree; - struct bfq_entity *entity; - int i=0; - - BUG_ON(sd->active_entity != NULL); - - if (bfqd != NULL && - jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { - entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true); - if (entity != NULL) { - i = BFQ_IOPRIO_CLASSES - 1; - bfqd->bfq_class_idle_last_service = jiffies; - sd->next_active = entity; - } - } - for (; i < BFQ_IOPRIO_CLASSES; i++) { - entity = __bfq_lookup_next_entity(st + i, false); - if (entity != NULL) { - if (extract) { - bfq_check_next_active(sd, entity); - bfq_active_extract(st + i, entity); - sd->active_entity = entity; - sd->next_active = NULL; - } - break; - } - } - - return entity; -} - -/* - * Get next queue for service. - */ -static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) -{ - struct bfq_entity *entity = NULL; - struct bfq_sched_data *sd; - struct bfq_queue *bfqq; - - BUG_ON(bfqd->active_queue != NULL); - - if (bfqd->busy_queues == 0) - return NULL; - - sd = &bfqd->root_group->sched_data; - for (; sd != NULL; sd = entity->my_sched_data) { - entity = bfq_lookup_next_entity(sd, 1, bfqd); - BUG_ON(entity == NULL); - entity->service = 0; - } - - bfqq = bfq_entity_to_bfqq(entity); - BUG_ON(bfqq == NULL); - - return bfqq; -} - -/* - * Forced extraction of the given queue. - */ -static void bfq_get_next_queue_forced(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - struct bfq_entity *entity; - struct bfq_sched_data *sd; - - BUG_ON(bfqd->active_queue != NULL); - - entity = &bfqq->entity; - /* - * Bubble up extraction/update from the leaf to the root. - */ - for_each_entity(entity) { - sd = entity->sched_data; - bfq_update_budget(entity); - bfq_update_vtime(bfq_entity_service_tree(entity)); - bfq_active_extract(bfq_entity_service_tree(entity), entity); - sd->active_entity = entity; - sd->next_active = NULL; - entity->service = 0; - } - - return; -} - -static void __bfq_bfqd_reset_active(struct bfq_data *bfqd) -{ - if (bfqd->active_cic != NULL) { - put_io_context(bfqd->active_cic->ioc); - bfqd->active_cic = NULL; - } - - bfqd->active_queue = NULL; - del_timer(&bfqd->idle_slice_timer); -} - -static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - int requeue) -{ - struct bfq_entity *entity = &bfqq->entity; - - if (bfqq == bfqd->active_queue) - __bfq_bfqd_reset_active(bfqd); - - bfq_deactivate_entity(entity, requeue); -} - -static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - - bfq_activate_entity(entity); -} - -/* - * Called when the bfqq no longer has requests pending, remove it from - * the service tree. - */ -static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, - int requeue) -{ - BUG_ON(!bfq_bfqq_busy(bfqq)); - BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); - - bfq_log_bfqq(bfqd, bfqq, "del from busy"); - - bfq_clear_bfqq_busy(bfqq); - - BUG_ON(bfqd->busy_queues == 0); - bfqd->busy_queues--; - - bfq_deactivate_bfqq(bfqd, bfqq, requeue); -} - -/* - * Called when an inactive queue receives a new request. - */ -static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - BUG_ON(bfq_bfqq_busy(bfqq)); - BUG_ON(bfqq == bfqd->active_queue); - - bfq_log_bfqq(bfqd, bfqq, "add to busy"); - - bfq_activate_bfqq(bfqd, bfqq); - - bfq_mark_bfqq_busy(bfqq); - bfqd->busy_queues++; -} diff --git a/block/bfq.h b/block/bfq.h deleted file mode 100644 index be2c572978c..00000000000 --- a/block/bfq.h +++ /dev/null @@ -1,593 +0,0 @@ -/* - * BFQ-v5 for 3.1.0: data structures and common functions prototypes. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - */ - -#ifndef _BFQ_H -#define _BFQ_H - -#include -#include -#include -#include - -#define BFQ_IOPRIO_CLASSES 3 -#define BFQ_CL_IDLE_TIMEOUT HZ/5 - -#define BFQ_MIN_WEIGHT 1 -#define BFQ_MAX_WEIGHT 1000 - -#define BFQ_DEFAULT_GRP_WEIGHT 10 -#define BFQ_DEFAULT_GRP_IOPRIO 0 -#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE - -struct bfq_entity; - -/** - * struct bfq_service_tree - per ioprio_class service tree. - * @active: tree for active entities (i.e., those backlogged). - * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). - * @first_idle: idle entity with minimum F_i. - * @last_idle: idle entity with maximum F_i. - * @vtime: scheduler virtual time. - * @wsum: scheduler weight sum; active and idle entities contribute to it. - * - * Each service tree represents a B-WF2Q+ scheduler on its own. Each - * ioprio_class has its own independent scheduler, and so its own - * bfq_service_tree. All the fields are protected by the queue lock - * of the containing bfqd. - */ -struct bfq_service_tree { - struct rb_root active; - struct rb_root idle; - - struct bfq_entity *first_idle; - struct bfq_entity *last_idle; - - u64 vtime; - unsigned long wsum; -}; - -/** - * struct bfq_sched_data - multi-class scheduler. - * @active_entity: entity under service. - * @next_active: head-of-the-line entity in the scheduler. - * @service_tree: array of service trees, one per ioprio_class. - * - * bfq_sched_data is the basic scheduler queue. It supports three - * ioprio_classes, and can be used either as a toplevel queue or as - * an intermediate queue on a hierarchical setup. - * @next_active points to the active entity of the sched_data service - * trees that will be scheduled next. - * - * The supported ioprio_classes are the same as in CFQ, in descending - * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. - * Requests from higher priority queues are served before all the - * requests from lower priority queues; among requests of the same - * queue requests are served according to B-WF2Q+. - * All the fields are protected by the queue lock of the containing bfqd. - */ -struct bfq_sched_data { - struct bfq_entity *active_entity; - struct bfq_entity *next_active; - struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; -}; - -/** - * struct bfq_entity - schedulable entity. - * @rb_node: service_tree member. - * @on_st: flag, true if the entity is on a tree (either the active or - * the idle one of its service_tree). - * @finish: B-WF2Q+ finish timestamp (aka F_i). - * @start: B-WF2Q+ start timestamp (aka S_i). - * @tree: tree the entity is enqueued into; %NULL if not on a tree. - * @min_start: minimum start time of the (active) subtree rooted at - * this entity; used for O(log N) lookups into active trees. - * @service: service received during the last round of service. - * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. - * @weight: weight of the queue - * @parent: parent entity, for hierarchical scheduling. - * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the - * associated scheduler queue, %NULL on leaf nodes. - * @sched_data: the scheduler queue this entity belongs to. - * @ioprio: the ioprio in use. - * @new_weight: when a weight change is requested, the new weight value. - * @orig_weight: original weight, used to implement weight boosting - * @new_ioprio: when an ioprio change is requested, the new ioprio value. - * @ioprio_class: the ioprio_class in use. - * @new_ioprio_class: when an ioprio_class change is requested, the new - * ioprio_class value. - * @ioprio_changed: flag, true when the user requested a weight, ioprio or - * ioprio_class change. - * - * A bfq_entity is used to represent either a bfq_queue (leaf node in the - * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each - * entity belongs to the sched_data of the parent group in the cgroup - * hierarchy. Non-leaf entities have also their own sched_data, stored - * in @my_sched_data. - * - * Each entity stores independently its priority values; this would - * allow different weights on different devices, but this - * functionality is not exported to userspace by now. Priorities and - * weights are updated lazily, first storing the new values into the - * new_* fields, then setting the @ioprio_changed flag. As soon as - * there is a transition in the entity state that allows the priority - * update to take place the effective and the requested priority - * values are synchronized. - * - * Unless cgroups are used, the weight value is calculated from the - * ioprio to export the same interface as CFQ. When dealing with - * ``well-behaved'' queues (i.e., queues that do not spend too much - * time to consume their budget and have true sequential behavior, and - * when there are no external factors breaking anticipation) the - * relative weights at each level of the cgroups hierarchy should be - * guaranteed. All the fields are protected by the queue lock of the - * containing bfqd. - */ -struct bfq_entity { - struct rb_node rb_node; - - int on_st; - - u64 finish; - u64 start; - - struct rb_root *tree; - - u64 min_start; - - unsigned long service, budget; - unsigned short weight, new_weight; - unsigned short orig_weight; - - struct bfq_entity *parent; - - struct bfq_sched_data *my_sched_data; - struct bfq_sched_data *sched_data; - - unsigned short ioprio, new_ioprio; - unsigned short ioprio_class, new_ioprio_class; - - int ioprio_changed; -}; - -struct bfq_group; - -/** - * struct bfq_queue - leaf schedulable entity. - * @ref: reference counter. - * @bfqd: parent bfq_data. - * @new_bfqq: shared bfq_queue if queue is cooperating with - * one or more other queues. - * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). - * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). - * @sort_list: sorted list of pending requests. - * @next_rq: if fifo isn't expired, next request to serve. - * @queued: nr of requests queued in @sort_list. - * @allocated: currently allocated requests. - * @meta_pending: pending metadata requests. - * @fifo: fifo list of requests in sort_list. - * @entity: entity representing this queue in the scheduler. - * @max_budget: maximum budget allowed from the feedback mechanism. - * @budget_timeout: budget expiration (in jiffies). - * @dispatched: number of requests on the dispatch list or inside driver. - * @org_ioprio: saved ioprio during boosted periods. - * @flags: status flags. - * @bfqq_list: node for active/idle bfqq list inside our bfqd. - * @seek_samples: number of seeks sampled - * @seek_total: sum of the distances of the seeks sampled - * @seek_mean: mean seek distance - * @last_request_pos: position of the last request enqueued - * @pid: pid of the process owning the queue, used for logging purposes. - * @last_rais_start_time: last (idle -> weight-raised) transition attempt - * @raising_cur_max_time: current max raising time for this queue - * - * A bfq_queue is a leaf request queue; it can be associated to an io_context - * or more (if it is an async one). @cgroup holds a reference to the - * cgroup, to be sure that it does not disappear while a bfqq still - * references it (mostly to avoid races between request issuing and task - * migration followed by cgroup distruction). - * All the fields are protected by the queue lock of the containing bfqd. - */ -struct bfq_queue { - atomic_t ref; - struct bfq_data *bfqd; - - /* fields for cooperating queues handling */ - struct bfq_queue *new_bfqq; - struct rb_node pos_node; - struct rb_root *pos_root; - - struct rb_root sort_list; - struct request *next_rq; - int queued[2]; - int allocated[2]; - int meta_pending; - struct list_head fifo; - - struct bfq_entity entity; - - unsigned long max_budget; - unsigned long budget_timeout; - - int dispatched; - - unsigned short org_ioprio; - - unsigned int flags; - - struct list_head bfqq_list; - - unsigned int seek_samples; - u64 seek_total; - sector_t seek_mean; - sector_t last_request_pos; - - pid_t pid; - - /* weight-raising fields */ - unsigned int raising_cur_max_time; - u64 last_rais_start_finish, soft_rt_next_start; - unsigned int raising_coeff; -}; - -/** - * struct bfq_data - per device data structure. - * @queue: request queue for the managed device. - * @root_group: root bfq_group for the device. - * @rq_pos_tree: rbtree sorted by next_request position, - * used when determining if two or more queues - * have interleaving requests (see bfq_close_cooperator). - * @busy_queues: number of bfq_queues containing requests (including the - * queue under service, even if it is idling). - * @queued: number of queued requests. - * @rq_in_driver: number of requests dispatched and waiting for completion. - * @sync_flight: number of sync requests in the driver. - * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples - * completed requests . - * @hw_tag_samples: nr of samples used to calculate hw_tag. - * @hw_tag: flag set to one if the driver is showing a queueing behavior. - * @budgets_assigned: number of budgets assigned. - * @idle_slice_timer: timer set when idling for the next sequential request - * from the queue under service. - * @unplug_work: delayed work to restart dispatching on the request queue. - * @active_queue: bfq_queue under service. - * @active_cic: cfq_io_context (cic) associated with the @active_queue. - * @last_position: on-disk position of the last served request. - * @last_budget_start: beginning of the last budget. - * @last_idling_start: beginning of the last idle slice. - * @peak_rate: peak transfer rate observed for a budget. - * @peak_rate_samples: number of samples used to calculate @peak_rate. - * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling. - * @cic_index: use small consequent indexes as radix tree keys to reduce depth - * @cic_list: list of all the cics active on the bfq_data device. - * @group_list: list of all the bfq_groups active on the device. - * @active_list: list of all the bfq_queues active on the device. - * @idle_list: list of all the bfq_queues idle on the device. - * @bfq_quantum: max number of requests dispatched per dispatch round. - * @bfq_fifo_expire: timeout for async/sync requests; when it expires - * requests are served in fifo order. - * @bfq_back_penalty: weight of backward seeks wrt forward ones. - * @bfq_back_max: maximum allowed backward seek. - * @bfq_slice_idle: maximum idling time. - * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning). - * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to - * async queues. - * @bfq_timeout: timeout for bfq_queues to consume their budget; used to - * to prevent seeky queues to impose long latencies to well - * behaved ones (this also implies that seeky queues cannot - * receive guarantees in the service domain; after a timeout - * they are charged for the whole allocated budget, to try - * to preserve a behavior reasonably fair among them, but - * without service-domain guarantees). - * @bfq_raising_coeff: Maximum factor by which the weight of a boosted - * queue is multiplied - * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies) - * @bfq_raising_rt_max_time: maximum duration for soft real-time processes - * @bfq_raising_min_idle_time: minimum idle period after which weight-raising - * may be reactivated for a queue (in jiffies) - * @bfq_raising_min_inter_arr_async: minimum period between request arrivals - * after which weight-raising may be - * reactivated for an already busy queue - * (in jiffies) - * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue, - * sectors per seconds - * @RT_prod: cached value of the product R*T used for computing the maximum - * duration of the weight raising automatically - * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions - * - * All the fields are protected by the @queue lock. - */ -struct bfq_data { - struct request_queue *queue; - - struct bfq_group *root_group; - - struct rb_root rq_pos_tree; - - int busy_queues; - int queued; - int rq_in_driver; - int sync_flight; - - int max_rq_in_driver; - int hw_tag_samples; - int hw_tag; - - int budgets_assigned; - - struct timer_list idle_slice_timer; - struct work_struct unplug_work; - - struct bfq_queue *active_queue; - struct cfq_io_context *active_cic; - - sector_t last_position; - - ktime_t last_budget_start; - ktime_t last_idling_start; - int peak_rate_samples; - u64 peak_rate; - unsigned long bfq_max_budget; - - unsigned int cic_index; - struct list_head cic_list; - struct hlist_head group_list; - struct list_head active_list; - struct list_head idle_list; - - unsigned int bfq_quantum; - unsigned int bfq_fifo_expire[2]; - unsigned int bfq_back_penalty; - unsigned int bfq_back_max; - unsigned int bfq_slice_idle; - u64 bfq_class_idle_last_service; - - unsigned int bfq_user_max_budget; - unsigned int bfq_max_budget_async_rq; - unsigned int bfq_timeout[2]; - - bool low_latency; - - /* parameters of the low_latency heuristics */ - unsigned int bfq_raising_coeff; - unsigned int bfq_raising_max_time; - unsigned int bfq_raising_rt_max_time; - unsigned int bfq_raising_min_idle_time; - unsigned int bfq_raising_min_inter_arr_async; - unsigned int bfq_raising_max_softrt_rate; - u64 RT_prod; - - struct bfq_queue oom_bfqq; -}; - -enum bfqq_state_flags { - BFQ_BFQQ_FLAG_busy = 0, /* has requests or is under service */ - BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ - BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ - BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ - BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ - BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ - BFQ_BFQQ_FLAG_sync, /* synchronous queue */ - BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ - BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ - BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ - BFQ_BFQQ_FLAG_some_coop_idle, /* some cooperator is inactive */ -}; - -#define BFQ_BFQQ_FNS(name) \ -static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ -{ \ - (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ -} \ -static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ -{ \ - (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ -} \ -static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ -{ \ - return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ -} - -BFQ_BFQQ_FNS(busy); -BFQ_BFQQ_FNS(wait_request); -BFQ_BFQQ_FNS(must_alloc); -BFQ_BFQQ_FNS(fifo_expire); -BFQ_BFQQ_FNS(idle_window); -BFQ_BFQQ_FNS(prio_changed); -BFQ_BFQQ_FNS(sync); -BFQ_BFQQ_FNS(budget_new); -BFQ_BFQQ_FNS(coop); -BFQ_BFQQ_FNS(split_coop); -BFQ_BFQQ_FNS(some_coop_idle); -#undef BFQ_BFQQ_FNS - -/* Logging facilities. */ -#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) - -#define bfq_log(bfqd, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) - -/* Expiration reasons. */ -enum bfqq_expiration { - BFQ_BFQQ_TOO_IDLE = 0, /* queue has been idling for too long */ - BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ - BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ - BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ -}; - -#ifdef CONFIG_CGROUP_BFQIO -/** - * struct bfq_group - per (device, cgroup) data structure. - * @entity: schedulable entity to insert into the parent group sched_data. - * @sched_data: own sched_data, to contain child entities (they may be - * both bfq_queues and bfq_groups). - * @group_node: node to be inserted into the bfqio_cgroup->group_data - * list of the containing cgroup's bfqio_cgroup. - * @bfqd_node: node to be inserted into the @bfqd->group_list list - * of the groups active on the same device; used for cleanup. - * @bfqd: the bfq_data for the device this group acts upon. - * @async_bfqq: array of async queues for all the tasks belonging to - * the group, one queue per ioprio value per ioprio_class, - * except for the idle class that has only one queue. - * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). - * @my_entity: pointer to @entity, %NULL for the toplevel group; used - * to avoid too many special cases during group creation/migration. - * - * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup - * there is a set of bfq_groups, each one collecting the lower-level - * entities belonging to the group that are acting on the same device. - * - * Locking works as follows: - * o @group_node is protected by the bfqio_cgroup lock, and is accessed - * via RCU from its readers. - * o @bfqd is protected by the queue lock, RCU is used to access it - * from the readers. - * o All the other fields are protected by the @bfqd queue lock. - */ -struct bfq_group { - struct bfq_entity entity; - struct bfq_sched_data sched_data; - - struct hlist_node group_node; - struct hlist_node bfqd_node; - - void *bfqd; - - struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; - struct bfq_queue *async_idle_bfqq; - - struct bfq_entity *my_entity; -}; - -/** - * struct bfqio_cgroup - bfq cgroup data structure. - * @css: subsystem state for bfq in the containing cgroup. - * @weight: cgroup weight. - * @ioprio: cgroup ioprio. - * @ioprio_class: cgroup ioprio_class. - * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. - * @group_data: list containing the bfq_group belonging to this cgroup. - * - * @group_data is accessed using RCU, with @lock protecting the updates, - * @ioprio and @ioprio_class are protected by @lock. - */ -struct bfqio_cgroup { - struct cgroup_subsys_state css; - - unsigned short weight, ioprio, ioprio_class; - - spinlock_t lock; - struct hlist_head group_data; -}; -#else -struct bfq_group { - struct bfq_sched_data sched_data; - - struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; - struct bfq_queue *async_idle_bfqq; -}; -#endif - -static inline struct bfq_service_tree * -bfq_entity_service_tree(struct bfq_entity *entity) -{ - struct bfq_sched_data *sched_data = entity->sched_data; - unsigned int idx = entity->ioprio_class - 1; - - BUG_ON(idx >= BFQ_IOPRIO_CLASSES); - BUG_ON(sched_data == NULL); - - return sched_data->service_tree + idx; -} - -static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic, - int is_sync) -{ - return cic->cfqq[!!is_sync]; -} - -static inline void cic_set_bfqq(struct cfq_io_context *cic, - struct bfq_queue *bfqq, int is_sync) -{ - cic->cfqq[!!is_sync] = bfqq; -} - -static inline void call_for_each_cic(struct io_context *ioc, - void (*func)(struct io_context *, - struct cfq_io_context *)) -{ - struct cfq_io_context *cic; - struct hlist_node *n; - - rcu_read_lock(); - hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) - func(ioc, cic); - rcu_read_unlock(); -} - -#define CIC_DEAD_KEY 1ul -#define CIC_DEAD_INDEX_SHIFT 1 - -static inline void *bfqd_dead_key(struct bfq_data *bfqd) -{ - return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); -} - -/** - * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. - * @ptr: a pointer to a bfqd. - * @flags: storage for the flags to be saved. - * - * This function allows cic->key and bfqg->bfqd to be protected by the - * queue lock of the bfqd they reference; the pointer is dereferenced - * under RCU, so the storage for bfqd is assured to be safe as long - * as the RCU read side critical section does not end. After the - * bfqd->queue->queue_lock is taken the pointer is rechecked, to be - * sure that no other writer accessed it. If we raced with a writer, - * the function returns NULL, with the queue unlocked, otherwise it - * returns the dereferenced pointer, with the queue locked. - */ -static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, - unsigned long *flags) -{ - struct bfq_data *bfqd; - - rcu_read_lock(); - bfqd = rcu_dereference(*(struct bfq_data **)ptr); - - if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) { - spin_lock_irqsave(bfqd->queue->queue_lock, *flags); - if (*ptr == bfqd) - goto out; - spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); - } - - bfqd = NULL; -out: - rcu_read_unlock(); - return bfqd; -} - -static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, - unsigned long *flags) -{ - spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -} - -static void bfq_changed_ioprio(struct io_context *ioc, - struct cfq_io_context *cic); -static void bfq_put_queue(struct bfq_queue *bfqq); -static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); -static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, - struct bfq_group *bfqg, int is_sync, - struct io_context *ioc, gfp_t gfp_mask); -static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); -static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); -#endif diff --git a/block/blk-ioc.c b/block/blk-ioc.c index d0d16d4a79a..6f9bbd97865 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -17,12 +16,13 @@ */ static struct kmem_cache *iocontext_cachep; -static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) +static void cfq_dtor(struct io_context *ioc) { - if (!hlist_empty(list)) { + if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = hlist_entry(list->first, struct cfq_io_context, cic_list); + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, + cic_list); cic->dtor(ioc); } } @@ -40,9 +40,7 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - - hlist_sched_dtor(ioc, &ioc->cic_list); - hlist_sched_dtor(ioc, &ioc->bfq_cic_list); + cfq_dtor(ioc); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -52,14 +50,15 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) +static void cfq_exit(struct io_context *ioc) { rcu_read_lock(); - if (!hlist_empty(list)) { + if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = hlist_entry(list->first, struct cfq_io_context, cic_list); + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, + cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -75,10 +74,9 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) { - hlist_sched_exit(ioc, &ioc->cic_list); - hlist_sched_exit(ioc, &ioc->bfq_cic_list); - } + if (atomic_dec_and_test(&ioc->nr_tasks)) + cfq_exit(ioc); + put_io_context(ioc); } @@ -91,14 +89,12 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); + ioc->ioprio_changed = 0; ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); - INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); - INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 0f60ba0ad87..97c3d462732 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2934,6 +2934,7 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); + ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3225,13 +3226,8 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - /* - * test_and_clear_bit() implies a memory barrier, paired with - * the wmb() in fs/ioprio.c, so the value seen for ioprio is the - * new one. - */ - if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, - ioc->ioprio_changed))) + smp_read_barrier_depends(); + if (unlikely(ioc->ioprio_changed)) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED diff --git a/fs/ioprio.c b/fs/ioprio.c index 95a6c2b04e0..7da2a06508e 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err, i; + int err; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,17 +60,12 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } - /* let other ioc users see the new values */ - smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - /* make sure schedulers see the new ioprio value */ - wmb(); - for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) - set_bit(i, ioc->ioprio_changed); + ioc->ioprio_changed = 1; } task_unlock(task); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ae2ef66217b..b7fd4c8c70c 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -65,12 +65,6 @@ SUBSYS(perf) /* */ -#ifdef CONFIG_CGROUP_BFQIO -SUBSYS(bfqio) -#endif - -/* */ - #ifdef CONFIG_CGROUP_TIMER_SLACK SUBSYS(timer_slack) #endif diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 69fdd5894ef..5037a0ad231 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H -#include #include #include +struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,7 +16,7 @@ struct cfq_ttime { struct cfq_io_context { void *key; - void *cfqq[2]; + struct cfq_queue *cfqq[2]; struct io_context *ioc; @@ -31,16 +31,6 @@ struct cfq_io_context { struct rcu_head rcu_head; }; -/* - * Indexes into the ioprio_changed bitmap. A bit set indicates that - * the corresponding I/O scheduler needs to see a ioprio update. - */ -enum { - IOC_CFQ_IOPRIO_CHANGED, - IOC_BFQ_IOPRIO_CHANGED, - IOC_IOPRIO_CHANGED_BITS -}; - /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -53,7 +43,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); + unsigned short ioprio_changed; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -67,8 +57,6 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - struct radix_tree_root bfq_radix_root; - struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; From ef7f88751d74b074ad4efda0977d830a1e785dee Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 17:25:38 -0500 Subject: [PATCH 603/678] defconfig: enable cgroup timer slack --- arch/arm/configs/metallice_grouper_defconfig | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 6216f452a8d..45e0a6a03ff 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a64" +CONFIG_LOCALVERSION="-MKernel-" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -82,6 +82,7 @@ CONFIG_LOG_BUF_SHIFT=17 CONFIG_CGROUPS=y CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_TIMER_SLACK=y # CONFIG_CGROUP_DEVICE is not set # CONFIG_CPUSETS is not set CONFIG_CGROUP_CPUACCT=y @@ -94,7 +95,7 @@ CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y # CONFIG_BLK_CGROUP is not set # CONFIG_NAMESPACES is not set -# CONFIG_SCHED_AUTOGROUP is not set +CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set # CONFIG_RELAY is not set CONFIG_BLK_DEV_INITRD=y @@ -180,12 +181,9 @@ CONFIG_IOSCHED_ROW=y CONFIG_IOSCHED_CFQ=y # CONFIG_IOSCHED_SIO is not set # CONFIG_IOSCHED_VR is not set -CONFIG_IOSCHED_BFQ=y -CONFIG_CGROUP_BFQIO=y CONFIG_DEFAULT_DEADLINE=y # CONFIG_DEFAULT_ROW is not set # CONFIG_DEFAULT_CFQ is not set -# CONFIG_DEFAULT_BFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="deadline" # CONFIG_INLINE_SPIN_TRYLOCK is not set From 6bb77599c2f022a0a610261f93e9596e3bd9ae3d Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 15:53:29 -0500 Subject: [PATCH 604/678] [PATCH 1/4] block: prepare I/O context code for BFQ-v6r2 for 3.1 --- block/Kconfig.iosched | 26 ++++++++++++++++++++++++++ block/blk-ioc.c | 30 +++++++++++++++++------------- block/cfq-iosched.c | 10 +++++++--- fs/ioprio.c | 9 +++++++-- include/linux/iocontext.h | 21 ++++++++++++++++++--- 5 files changed, 75 insertions(+), 21 deletions(-) diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 8201a45cd26..06ec27e59a0 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -71,6 +71,28 @@ config IOSCHED_VR Requests are chosen according to SSTF with a penalty of rev_penalty for switching head direction. +config IOSCHED_BFQ + tristate "BFQ I/O scheduler" + depends on EXPERIMENTAL + default n + ---help--- + The BFQ I/O scheduler tries to distribute bandwidth among + all processes according to their weights. + It aims at distributing the bandwidth as desired, independently of + the disk parameters and with any workload. It also tries to + guarantee low latency to interactive and soft real-time + applications. If compiled built-in (saying Y here), BFQ can + be configured to support hierarchical scheduling. + +config CGROUP_BFQIO + bool "BFQ hierarchical scheduling support" + depends on CGROUPS && IOSCHED_BFQ=y + default n + ---help--- + Enable hierarchical scheduling in BFQ, using the cgroups + filesystem interface. The name of the subsystem will be + bfqio. + choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -94,6 +116,9 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y + config DEFAULT_BFQ + bool "BFQ" if IOSCHED_BFQ=y + config DEFAULT_NOOP bool "No-op" @@ -110,6 +135,7 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ + default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO default "vr" if DEFAULT_VR diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 6f9bbd97865..d0d16d4a79a 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -16,13 +17,12 @@ */ static struct kmem_cache *iocontext_cachep; -static void cfq_dtor(struct io_context *ioc) +static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) { - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->dtor(ioc); } } @@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - cfq_dtor(ioc); + + hlist_sched_dtor(ioc, &ioc->cic_list); + hlist_sched_dtor(ioc, &ioc->bfq_cic_list); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void cfq_exit(struct io_context *ioc) +static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) { rcu_read_lock(); - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -74,9 +75,10 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) - cfq_exit(ioc); - + if (atomic_dec_and_test(&ioc->nr_tasks)) { + hlist_sched_exit(ioc, &ioc->cic_list); + hlist_sched_exit(ioc, &ioc->bfq_cic_list); + } put_io_context(ioc); } @@ -89,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - ioc->ioprio_changed = 0; + bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); + INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); + INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 97c3d462732..0f60ba0ad87 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2934,7 +2934,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); - ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3226,8 +3225,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - smp_read_barrier_depends(); - if (unlikely(ioc->ioprio_changed)) + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED diff --git a/fs/ioprio.c b/fs/ioprio.c index 7da2a06508e..95a6c2b04e0 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err; + int err, i; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } + /* let other ioc users see the new values */ + smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - ioc->ioprio_changed = 1; + /* make sure schedulers see the new ioprio value */ + wmb(); + for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) + set_bit(i, ioc->ioprio_changed); } task_unlock(task); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 5037a0ad231..fbdaa5aef61 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H +#include #include #include -struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,12 +16,15 @@ struct cfq_ttime { struct cfq_io_context { void *key; - struct cfq_queue *cfqq[2]; + void *cfqq[2]; struct io_context *ioc; struct cfq_ttime ttime; + unsigned int raising_time_left; + unsigned int saved_idle_window; + struct list_head queue_list; struct hlist_node cic_list; @@ -31,6 +34,16 @@ struct cfq_io_context { struct rcu_head rcu_head; }; +/* + * Indexes into the ioprio_changed bitmap. A bit set indicates that + * the corresponding I/O scheduler needs to see a ioprio update. + */ +enum { + IOC_CFQ_IOPRIO_CHANGED, + IOC_BFQ_IOPRIO_CHANGED, + IOC_IOPRIO_CHANGED_BITS +}; + /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -43,7 +56,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; + DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -57,6 +70,8 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; + struct radix_tree_root bfq_radix_root; + struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; From 57e9eeb1d03b904755b86f6c240539a6a99839c7 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 15:56:59 -0500 Subject: [PATCH 605/678] [PATCH 2/4] block: cgroups, kconfig, build bits for BFQ-v6r2-3.1 --- include/linux/cgroup_subsys.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index b7fd4c8c70c..f2ca8cf3a88 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -69,4 +69,10 @@ SUBSYS(perf) SUBSYS(timer_slack) #endif -/* */ \ No newline at end of file +/* */ + +#ifdef CONFIG_CGROUP_BFQIO +SUBSYS(bfqio) +#endif + +/* */ From ed50439f4724f002066e22e59dff1feaa08008aa Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 15:56:59 -0500 Subject: [PATCH 606/678] [PATCH 2/4] block: cgroups, kconfig, build bits for BFQ-v6r2-3.1 --- block/Makefile | 1 + block/bfq-cgroup.c | 876 +++++++++++ block/bfq-ioc.c | 410 ++++++ block/bfq-iosched.c | 3367 +++++++++++++++++++++++++++++++++++++++++++ block/bfq-sched.c | 1040 +++++++++++++ block/bfq.h | 606 ++++++++ 6 files changed, 6300 insertions(+) create mode 100644 block/bfq-cgroup.c create mode 100644 block/bfq-ioc.c create mode 100644 block/bfq-iosched.c create mode 100644 block/bfq-sched.c create mode 100644 block/bfq.h diff --git a/block/Makefile b/block/Makefile index eb332a2d98c..760d8f3ff2e 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o +obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c new file mode 100644 index 00000000000..be3e902c5c9 --- /dev/null +++ b/block/bfq-cgroup.c @@ -0,0 +1,876 @@ +/* + * BFQ: CGROUPS support. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. + */ + +#ifdef CONFIG_CGROUP_BFQIO +static struct bfqio_cgroup bfqio_root_cgroup = { + .weight = BFQ_DEFAULT_GRP_WEIGHT, + .ioprio = BFQ_DEFAULT_GRP_IOPRIO, + .ioprio_class = BFQ_DEFAULT_GRP_CLASS, +}; + +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; +} + +static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) +{ + return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), + struct bfqio_cgroup, css); +} + +/* + * Search the bfq_group for bfqd into the hash table (by now only a list) + * of bgrp. Must be called under rcu_read_lock(). + */ +static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, + struct bfq_data *bfqd) +{ + struct bfq_group *bfqg; + struct hlist_node *n; + void *key; + + hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) { + key = rcu_dereference(bfqg->bfqd); + if (key == bfqd) + return bfqg; + } + + return NULL; +} + +static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, + struct bfq_group *bfqg) +{ + struct bfq_entity *entity = &bfqg->entity; + + /* + * If the weight of the entity has never been set via the sysfs + * interface, then bgrp->weight == 0. In this case we initialize + * the weight from the current ioprio value. Otherwise, the group + * weight, if set, has priority over the ioprio value. + */ + if (bgrp->weight == 0) { + entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); + entity->new_ioprio = bgrp->ioprio; + } else { + entity->new_weight = bgrp->weight; + entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); + } + entity->orig_weight = entity->weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; + entity->my_sched_data = &bfqg->sched_data; +} + +static inline void bfq_group_set_parent(struct bfq_group *bfqg, + struct bfq_group *parent) +{ + struct bfq_entity *entity; + + BUG_ON(parent == NULL); + BUG_ON(bfqg == NULL); + + entity = &bfqg->entity; + entity->parent = parent->my_entity; + entity->sched_data = &parent->sched_data; +} + +/** + * bfq_group_chain_alloc - allocate a chain of groups. + * @bfqd: queue descriptor. + * @cgroup: the leaf cgroup this chain starts from. + * + * Allocate a chain of groups starting from the one belonging to + * @cgroup up to the root cgroup. Stop if a cgroup on the chain + * to the root has already an allocated group on @bfqd. + */ +static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; + + for (; cgroup != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) { + /* + * All the cgroups in the path from there to the + * root must have a bfq_group for bfqd, so we don't + * need any more allocations. + */ + break; + } + + bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); + if (bfqg == NULL) + goto cleanup; + + bfq_group_init_entity(bgrp, bfqg); + bfqg->my_entity = &bfqg->entity; + + if (leaf == NULL) { + leaf = bfqg; + prev = leaf; + } else { + bfq_group_set_parent(prev, bfqg); + /* + * Build a list of allocated nodes using the bfqd + * filed, that is still unused and will be initialized + * only after the node will be connected. + */ + prev->bfqd = bfqg; + prev = bfqg; + } + } + + return leaf; + +cleanup: + while (leaf != NULL) { + prev = leaf; + leaf = leaf->bfqd; + kfree(prev); + } + + return NULL; +} + +/** + * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy. + * @bfqd: the queue descriptor. + * @cgroup: the leaf cgroup to start from. + * @leaf: the leaf group (to be associated to @cgroup). + * + * Try to link a chain of groups to a cgroup hierarchy, connecting the + * nodes bottom-up, so we can be sure that when we find a cgroup in the + * hierarchy that already as a group associated to @bfqd all the nodes + * in the path to the root cgroup have one too. + * + * On locking: the queue lock protects the hierarchy (there is a hierarchy + * per device) while the bfqio_cgroup lock protects the list of groups + * belonging to the same cgroup. + */ +static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, + struct bfq_group *leaf) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *next, *prev = NULL; + unsigned long flags; + + assert_spin_locked(bfqd->queue->queue_lock); + + for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + next = leaf->bfqd; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + BUG_ON(bfqg != NULL); + + spin_lock_irqsave(&bgrp->lock, flags); + + rcu_assign_pointer(leaf->bfqd, bfqd); + hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); + hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); + + spin_unlock_irqrestore(&bgrp->lock, flags); + + prev = leaf; + leaf = next; + } + + BUG_ON(cgroup == NULL && leaf != NULL); + if (cgroup != NULL && prev != NULL) { + bgrp = cgroup_to_bfqio(cgroup); + bfqg = bfqio_lookup_group(bgrp, bfqd); + bfq_group_set_parent(prev, bfqg); + } +} + +/** + * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. + * @bfqd: queue descriptor. + * @cgroup: cgroup being searched for. + * + * Return a group associated to @bfqd in @cgroup, allocating one if + * necessary. When a group is returned all the cgroups in the path + * to the root have a group associated to @bfqd. + * + * If the allocation fails, return the root group: this breaks guarantees + * but is a safe fallbak. If this loss becames a problem it can be + * mitigated using the equivalent weight (given by the product of the + * weights of the groups in the path from @group to the root) in the + * root scheduler. + * + * We allocate all the missing nodes in the path from the leaf cgroup + * to the root and we connect the nodes only after all the allocations + * have been successful. + */ +static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct bfq_group *bfqg; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) + return bfqg; + + bfqg = bfq_group_chain_alloc(bfqd, cgroup); + if (bfqg != NULL) + bfq_group_chain_link(bfqd, cgroup, bfqg); + else + bfqg = bfqd->root_group; + + return bfqg; +} + +/** + * bfq_bfqq_move - migrate @bfqq to @bfqg. + * @bfqd: queue descriptor. + * @bfqq: the queue to move. + * @entity: @bfqq's entity. + * @bfqg: the group to move to. + * + * Move @bfqq to @bfqg, deactivating it from its old group and reactivating + * it on the new one. Avoid putting the entity on the old group idle tree. + * + * Must be called under the queue lock; the cgroup owning @bfqg must + * not disappear (by now this just means that we are called under + * rcu_read_lock()). + */ +static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct bfq_entity *entity, struct bfq_group *bfqg) +{ + int busy, resume; + + busy = bfq_bfqq_busy(bfqq); + resume = !RB_EMPTY_ROOT(&bfqq->sort_list); + + BUG_ON(resume && !entity->on_st); + BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue); + + if (busy) { + BUG_ON(atomic_read(&bfqq->ref) < 2); + + if (!resume) + bfq_del_bfqq_busy(bfqd, bfqq, 0); + else + bfq_deactivate_bfqq(bfqd, bfqq, 0); + } else if (entity->on_st) + bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); + + /* + * Here we use a reference to bfqg. We don't need a refcounter + * as the cgroup reference will not be dropped, so that its + * destroy() callback will not be invoked. + */ + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; + + if (busy && resume) + bfq_activate_bfqq(bfqd, bfqq); + + if (bfqd->active_queue == NULL && !bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); +} + +/** + * __bfq_cic_change_cgroup - move @cic to @cgroup. + * @bfqd: the queue descriptor. + * @cic: the cic to move. + * @cgroup: the cgroup to move to. + * + * Move cic to cgroup, assuming that bfqd->queue is locked; the caller + * has to make sure that the reference to cgroup is valid across the call. + * + * NOTE: an alternative approach might have been to store the current + * cgroup in bfqq and getting a reference to it, reducing the lookup + * time here, at the price of slightly more complex code. + */ +static struct bfq_group *__bfq_cic_change_cgroup(struct bfq_data *bfqd, + struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_queue *async_bfqq; + struct bfq_queue *sync_bfqq; + struct bfq_entity *entity; + struct bfq_group *bfqg; + + spin_lock(&bfqd->eqm_lock); + + async_bfqq = cic_to_bfqq(cic, 0); + sync_bfqq = cic_to_bfqq(cic, 1); + + bfqg = bfq_find_alloc_group(bfqd, cgroup); + if (async_bfqq != NULL) { + entity = &async_bfqq->entity; + + if (entity->sched_data != &bfqg->sched_data) { + cic_set_bfqq(cic, NULL, 0); + bfq_log_bfqq(bfqd, async_bfqq, + "cic_change_group: %p %d", + async_bfqq, atomic_read(&async_bfqq->ref)); + bfq_put_queue(async_bfqq); + } + } + + if (sync_bfqq != NULL) { + entity = &sync_bfqq->entity; + if (entity->sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); + } + + spin_unlock(&bfqd->eqm_lock); + + return bfqg; +} + +/** + * bfq_cic_change_cgroup - move @cic to @cgroup. + * @cic: the cic being migrated. + * @cgroup: the destination cgroup. + * + * When the task owning @cic is moved to @cgroup, @cic is immediately + * moved into its new parent group. + */ +static void bfq_cic_change_cgroup(struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL && + !strncmp(bfqd->queue->elevator->elevator_type->elevator_name, + "bfq", ELV_NAME_MAX)) { + __bfq_cic_change_cgroup(bfqd, cic, cgroup); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_cic_update_cgroup - update the cgroup of @cic. + * @cic: the @cic to update. + * + * Make sure that @cic is enqueued in the cgroup of the current task. + * We need this in addition to moving cics during the cgroup attach + * phase because the task owning @cic could be at its first disk + * access or we may end up in the root cgroup as the result of a + * memory allocation failure and here we try to move to the right + * group. + * + * Must be called under the queue lock. It is safe to use the returned + * value even after the rcu_read_unlock() as the migration/destruction + * paths act under the queue lock too. IOW it is impossible to race with + * group migration/destruction and end up with an invalid group as: + * a) here cgroup has not yet been destroyed, nor its destroy callback + * has started execution, as current holds a reference to it, + * b) if it is destroyed after rcu_read_unlock() [after current is + * migrated to a different cgroup] its attach() callback will have + * taken care of remove all the references to the old cgroup data. + */ +static struct bfq_group *bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + struct bfq_group *bfqg; + struct cgroup *cgroup; + + BUG_ON(bfqd == NULL); + + rcu_read_lock(); + cgroup = task_cgroup(current, bfqio_subsys_id); + bfqg = __bfq_cic_change_cgroup(bfqd, cic, cgroup); + rcu_read_unlock(); + + return bfqg; +} + +/** + * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. + * @st: the service tree being flushed. + */ +static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) +{ + struct bfq_entity *entity = st->first_idle; + + for (; entity != NULL; entity = st->first_idle) + __bfq_deactivate_entity(entity, 0); +} + +/** + * bfq_reparent_leaf_entity - move leaf entity to the root_group. + * @bfqd: the device data structure with the root group. + * @entity: the entity to move. + */ +static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(bfqq == NULL); + bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); + return; +} + +/** + * bfq_reparent_active_entities - move to the root group all active entities. + * @bfqd: the device data structure with the root group. + * @bfqg: the group to move from. + * @st: the service tree with the entities. + * + * Needs queue_lock to be taken and reference to be valid over the call. + */ +static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st) +{ + struct rb_root *active = &st->active; + struct bfq_entity *entity = NULL; + + if (!RB_EMPTY_ROOT(&st->active)) + entity = bfq_entity_of(rb_first(active)); + + for (; entity != NULL ; entity = bfq_entity_of(rb_first(active))) + bfq_reparent_leaf_entity(bfqd, entity); + + if (bfqg->sched_data.active_entity != NULL) + bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity); + + return; +} + +/** + * bfq_destroy_group - destroy @bfqg. + * @bgrp: the bfqio_cgroup containing @bfqg. + * @bfqg: the group being destroyed. + * + * Destroy @bfqg, making sure that it is not referenced from its parent. + */ +static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) +{ + struct bfq_data *bfqd; + struct bfq_service_tree *st; + struct bfq_entity *entity = bfqg->my_entity; + unsigned long uninitialized_var(flags); + int i; + + hlist_del(&bfqg->group_node); + + /* + * Empty all service_trees belonging to this group before deactivating + * the group itself. + */ + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { + st = bfqg->sched_data.service_tree + i; + + /* + * The idle tree may still contain bfq_queues belonging + * to exited task because they never migrated to a different + * cgroup from the one being destroyed now. Noone else + * can access them so it's safe to act without any lock. + */ + bfq_flush_idle_tree(st); + + /* + * It may happen that some queues are still active + * (busy) upon group destruction (if the corresponding + * processes have been forced to terminate). We move + * all the leaf entities corresponding to these queues + * to the root_group. + * Also, it may happen that the group has an entity + * under service, which is disconnected from the active + * tree: it must be moved, too. + * There is no need to put the sync queues, as the + * scheduler has taken no reference. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(!RB_EMPTY_ROOT(&st->active)); + BUG_ON(!RB_EMPTY_ROOT(&st->idle)); + } + BUG_ON(bfqg->sched_data.next_active != NULL); + BUG_ON(bfqg->sched_data.active_entity != NULL); + + /* + * We may race with device destruction, take extra care when + * dereferencing bfqg->bfqd. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + hlist_del(&bfqg->bfqd_node); + __bfq_deactivate_entity(entity, 0); + bfq_put_async_queues(bfqd, bfqg); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(entity->tree != NULL); + + /* + * No need to defer the kfree() to the end of the RCU grace + * period: we are called from the destroy() callback of our + * cgroup, so we can be sure that noone is a) still using + * this cgroup or b) doing lookups in it. + */ + kfree(bfqg); +} + +static void bfq_end_raising_async(struct bfq_data *bfqd) +{ + struct hlist_node *pos, *n; + struct bfq_group *bfqg; + + hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) + bfq_end_raising_async_queues(bfqd, bfqg); +} + +/** + * bfq_disconnect_groups - diconnect @bfqd from all its groups. + * @bfqd: the device descriptor being exited. + * + * When the device exits we just make sure that no lookup can return + * the now unused group structures. They will be deallocated on cgroup + * destruction. + */ +static void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + struct hlist_node *pos, *n; + struct bfq_group *bfqg; + + bfq_log(bfqd, "disconnect_groups beginning") ; + hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) { + hlist_del(&bfqg->bfqd_node); + + __bfq_deactivate_entity(bfqg->my_entity, 0); + + /* + * Don't remove from the group hash, just set an + * invalid key. No lookups can race with the + * assignment as bfqd is being destroyed; this + * implies also that new elements cannot be added + * to the list. + */ + rcu_assign_pointer(bfqg->bfqd, NULL); + + bfq_log(bfqd, "disconnect_groups: put async for group %p", + bfqg) ; + bfq_put_async_queues(bfqd, bfqg); + } +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; + struct bfq_group *bfqg = bfqd->root_group; + + bfq_put_async_queues(bfqd, bfqg); + + spin_lock_irq(&bgrp->lock); + hlist_del_rcu(&bfqg->group_node); + spin_unlock_irq(&bgrp->lock); + + /* + * No need to synchronize_rcu() here: since the device is gone + * there cannot be any read-side access to its root_group. + */ + kfree(bfqg); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + struct bfqio_cgroup *bgrp; + int i; + + bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); + if (bfqg == NULL) + return NULL; + + bfqg->entity.parent = NULL; + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + bgrp = &bfqio_root_cgroup; + spin_lock_irq(&bgrp->lock); + rcu_assign_pointer(bfqg->bfqd, bfqd); + hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); + spin_unlock_irq(&bgrp->lock); + + return bfqg; +} + +#define SHOW_FUNCTION(__VAR) \ +static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ + struct cftype *cftype) \ +{ \ + struct bfqio_cgroup *bgrp; \ + u64 ret; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + spin_lock_irq(&bgrp->lock); \ + ret = bgrp->__VAR; \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return ret; \ +} + +SHOW_FUNCTION(weight); +SHOW_FUNCTION(ioprio); +SHOW_FUNCTION(ioprio_class); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ +static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ + struct cftype *cftype, \ + u64 val) \ +{ \ + struct bfqio_cgroup *bgrp; \ + struct bfq_group *bfqg; \ + struct hlist_node *n; \ + \ + if (val < (__MIN) || val > (__MAX)) \ + return -EINVAL; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + \ + spin_lock_irq(&bgrp->lock); \ + bgrp->__VAR = (unsigned short)val; \ + hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \ + /* \ + * Setting the ioprio_changed flag of the entity \ + * to 1 with new_##__VAR == ##__VAR would re-set \ + * the value of the weight to its ioprio mapping. \ + * Set the flag only if necessary. \ + */ \ + if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ + bfqg->entity.new_##__VAR = (unsigned short)val; \ + smp_wmb(); \ + bfqg->entity.ioprio_changed = 1; \ + } \ + } \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return 0; \ +} + +STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); +STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); +STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); +#undef STORE_FUNCTION + +static struct cftype bfqio_files[] = { + { + .name = "weight", + .read_u64 = bfqio_cgroup_weight_read, + .write_u64 = bfqio_cgroup_weight_write, + }, + { + .name = "ioprio", + .read_u64 = bfqio_cgroup_ioprio_read, + .write_u64 = bfqio_cgroup_ioprio_write, + }, + { + .name = "ioprio_class", + .read_u64 = bfqio_cgroup_ioprio_class_read, + .write_u64 = bfqio_cgroup_ioprio_class_write, + }, +}; + +static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, subsys, bfqio_files, + ARRAY_SIZE(bfqio_files)); +} + +static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + + if (cgroup->parent != NULL) { + bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); + if (bgrp == NULL) + return ERR_PTR(-ENOMEM); + } else + bgrp = &bfqio_root_cgroup; + + spin_lock_init(&bgrp->lock); + INIT_HLIST_HEAD(&bgrp->group_data); + bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; + bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; + + return &bgrp->css; +} + +/* + * We cannot support shared io contexts, as we have no means to support + * two tasks with the same ioc in two different groups without major rework + * of the main cic/bfqq data structures. By now we allow a task to change + * its cgroup only if it's the only owner of its ioc; the drawback of this + * behavior is that a group containing a task that forked using CLONE_IO + * will not be destroyed until the tasks sharing the ioc die. + */ +static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct task_struct *tsk) +{ + struct io_context *ioc; + int ret = 0; + + /* task_lock() is needed to avoid races with exit_io_context() */ + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) + /* + * ioc == NULL means that the task is either too young or + * exiting: if it has still no ioc the ioc can't be shared, + * if the task is exiting the attach will fail anyway, no + * matter what we return here. + */ + ret = -EINVAL; + task_unlock(tsk); + + return ret; +} + +static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct cgroup *prev, struct task_struct *tsk) +{ + struct io_context *ioc; + struct cfq_io_context *cic; + struct hlist_node *n; + + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL) { + BUG_ON(atomic_long_read(&ioc->refcount) == 0); + atomic_long_inc(&ioc->refcount); + } + task_unlock(tsk); + + if (ioc == NULL) + return; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + bfq_cic_change_cgroup(cic, cgroup); + rcu_read_unlock(); + + put_io_context(ioc); +} + +static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct hlist_node *n, *tmp; + struct bfq_group *bfqg; + + /* + * Since we are destroying the cgroup, there are no more tasks + * referencing it, and all the RCU grace periods that may have + * referenced it are ended (as the destruction of the parent + * cgroup is RCU-safe); bgrp->group_data will not be accessed by + * anything else and we don't need any synchronization. + */ + hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node) + bfq_destroy_group(bgrp, bfqg); + + BUG_ON(!hlist_empty(&bgrp->group_data)); + + kfree(bgrp); +} + +struct cgroup_subsys bfqio_subsys = { + .name = "bfqio", + .create = bfqio_create, + .can_attach = bfqio_can_attach, + .attach = bfqio_attach, + .destroy = bfqio_destroy, + .populate = bfqio_populate, + .subsys_id = bfqio_subsys_id, +}; +#else +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->sched_data = &bfqg->sched_data; +} + +static inline struct bfq_group * +bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + return bfqd->root_group; +} + +static inline void bfq_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct bfq_entity *entity, + struct bfq_group *bfqg) +{ +} + +static void bfq_end_raising_async(struct bfq_data *bfqd) +{ + bfq_end_raising_async_queues(bfqd, bfqd->root_group); +} + +static inline void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + bfq_put_async_queues(bfqd, bfqd->root_group); +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + kfree(bfqd->root_group); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + int i; + + bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); + if (bfqg == NULL) + return NULL; + + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + return bfqg; +} +#endif diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c new file mode 100644 index 00000000000..f7366962da0 --- /dev/null +++ b/block/bfq-ioc.c @@ -0,0 +1,410 @@ +/* + * BFQ: I/O context handling. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +/** + * bfq_cic_free_rcu - deferred cic freeing. + * @head: RCU head of the cic to free. + * + * Free the cic containing @head and, if it was the last one and + * the module is exiting wake up anyone waiting for its deallocation + * (see bfq_exit()). + */ +static void bfq_cic_free_rcu(struct rcu_head *head) +{ + struct cfq_io_context *cic; + + cic = container_of(head, struct cfq_io_context, rcu_head); + + kmem_cache_free(bfq_ioc_pool, cic); + elv_ioc_count_dec(bfq_ioc_count); + + if (bfq_ioc_gone != NULL) { + spin_lock(&bfq_ioc_gone_lock); + if (bfq_ioc_gone != NULL && + !elv_ioc_count_read(bfq_ioc_count)) { + complete(bfq_ioc_gone); + bfq_ioc_gone = NULL; + } + spin_unlock(&bfq_ioc_gone_lock); + } +} + +static void bfq_cic_free(struct cfq_io_context *cic) +{ + call_rcu(&cic->rcu_head, bfq_cic_free_rcu); +} + +/** + * cic_free_func - disconnect a cic ready to be freed. + * @ioc: the io_context @cic belongs to. + * @cic: the cic to be freed. + * + * Remove @cic from the @ioc radix tree hash and from its cic list, + * deferring the deallocation of @cic to the end of the current RCU + * grace period. This assumes that __bfq_exit_single_io_context() + * has already been called for @cic. + */ +static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) +{ + unsigned long flags; + unsigned long dead_key = (unsigned long) cic->key; + + BUG_ON(!(dead_key & CIC_DEAD_KEY)); + + spin_lock_irqsave(&ioc->lock, flags); + radix_tree_delete(&ioc->bfq_radix_root, + dead_key >> CIC_DEAD_INDEX_SHIFT); + hlist_del_init_rcu(&cic->cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + bfq_cic_free(cic); +} + +static void bfq_free_io_context(struct io_context *ioc) +{ + /* + * ioc->refcount is zero here, or we are called from elv_unregister(), + * so no more cic's are allowed to be linked into this ioc. So it + * should be ok to iterate over the known list, we will see all cic's + * since no new ones are added. + */ + call_for_each_cic(ioc, cic_free_func); +} + +/** + * __bfq_exit_single_io_context - deassociate @cic from any running task. + * @bfqd: bfq_data on which @cic is valid. + * @cic: the cic being exited. + * + * Whenever no more tasks are using @cic or @bfqd is deallocated we + * need to invalidate its entry in the radix tree hash table and to + * release the queues it refers to. + * + * Called under the queue lock. + */ +static void __bfq_exit_single_io_context(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + struct io_context *ioc = cic->ioc; + + list_del_init(&cic->queue_list); + + /* + * Make sure dead mark is seen for dead queues + */ + smp_wmb(); + rcu_assign_pointer(cic->key, bfqd_dead_key(bfqd)); + + /* + * No write-side locking as no task is using @ioc (they're exited + * or bfqd is being deallocated. + */ + rcu_read_lock(); + if (rcu_dereference(ioc->ioc_data) == cic) { + rcu_read_unlock(); + spin_lock(&ioc->lock); + rcu_assign_pointer(ioc->ioc_data, NULL); + spin_unlock(&ioc->lock); + } else + rcu_read_unlock(); + + if (cic->cfqq[BLK_RW_ASYNC] != NULL) { + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_ASYNC]); + cic->cfqq[BLK_RW_ASYNC] = NULL; + } + + spin_lock(&bfqd->eqm_lock); + if (cic->cfqq[BLK_RW_SYNC] != NULL) { + /* + * If the bic is using a shared queue, put the reference + * taken on the io_context when the bic started using a + * shared bfq_queue. + */ + if (bfq_bfqq_coop(cic->cfqq[BLK_RW_SYNC])) + put_io_context(ioc); + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_SYNC]); + cic->cfqq[BLK_RW_SYNC] = NULL; + } + spin_unlock(&bfqd->eqm_lock); +} + +/** + * bfq_exit_single_io_context - deassociate @cic from @ioc (unlocked version). + * @ioc: the io_context @cic belongs to. + * @cic: the cic being exited. + * + * Take the queue lock and call __bfq_exit_single_io_context() to do the + * rest of the work. We take care of possible races with bfq_exit_queue() + * using bfq_get_bfqd_locked() (and abusing a little bit the RCU mechanism). + */ +static void bfq_exit_single_io_context(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL) { + __bfq_exit_single_io_context(bfqd, cic); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_exit_io_context - deassociate @ioc from all cics it owns. + * @ioc: the @ioc being exited. + * + * No more processes are using @ioc we need to clean up and put the + * internal structures we have that belongs to that process. Loop + * through all its cics, locking their queues and exiting them. + */ +static void bfq_exit_io_context(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_exit_single_io_context); +} + +static struct cfq_io_context *bfq_alloc_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct cfq_io_context *cic; + + cic = kmem_cache_alloc_node(bfq_ioc_pool, gfp_mask | __GFP_ZERO, + bfqd->queue->node); + if (cic != NULL) { + cic->ttime.last_end_request = jiffies; + /* + * A newly created cic indicates that the process has just + * started doing I/O, and is probably mapping into memory its + * executable and libraries: it definitely needs weight raising. + * There is however the possibility that the process performs, + * for a while, I/O close to some other process. EQM intercepts + * this behavior and may merge the queue corresponding to the + * process with some other queue, BEFORE the weight of the queue + * is raised. Merged queues are not weight-raised (they are assumed + * to belong to processes that benefit only from high throughput). + * If the merge is basically the consequence of an accident, then + * the queue will be split soon and will get back its old weight. + * It is then important to write down somewhere that this queue + * does need weight raising, even if it did not make it to get its + * weight raised before being merged. To this purpose, we overload + * the field raising_time_left and assign 1 to it, to mark the queue + * as needing weight raising. + */ + cic->raising_time_left = 1; + INIT_LIST_HEAD(&cic->queue_list); + INIT_HLIST_NODE(&cic->cic_list); + cic->dtor = bfq_free_io_context; + cic->exit = bfq_exit_io_context; + elv_ioc_count_inc(bfq_ioc_count); + } + + return cic; +} + +/** + * bfq_drop_dead_cic - free an exited cic. + * @bfqd: bfq data for the device in use. + * @ioc: io_context owning @cic. + * @cic: the @cic to free. + * + * We drop cfq io contexts lazily, so we may find a dead one. + */ +static void bfq_drop_dead_cic(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic) +{ + unsigned long flags; + + WARN_ON(!list_empty(&cic->queue_list)); + BUG_ON(cic->key != bfqd_dead_key(bfqd)); + + spin_lock_irqsave(&ioc->lock, flags); + + BUG_ON(ioc->ioc_data == cic); + + /* + * With shared I/O contexts two lookups may race and drop the + * same cic more than one time: RCU guarantees that the storage + * will not be freed too early, here we make sure that we do + * not try to remove the cic from the hashing structures multiple + * times. + */ + if (!hlist_unhashed(&cic->cic_list)) { + radix_tree_delete(&ioc->bfq_radix_root, bfqd->cic_index); + hlist_del_init_rcu(&cic->cic_list); + bfq_cic_free(cic); + } + + spin_unlock_irqrestore(&ioc->lock, flags); +} + +/** + * bfq_cic_lookup - search into @ioc a cic associated to @bfqd. + * @bfqd: the lookup key. + * @ioc: the io_context of the process doing I/O. + * + * If @ioc already has a cic associated to @bfqd return it, return %NULL + * otherwise. + */ +static struct cfq_io_context *bfq_cic_lookup(struct bfq_data *bfqd, + struct io_context *ioc) +{ + struct cfq_io_context *cic; + unsigned long flags; + void *k; + + if (unlikely(ioc == NULL)) + return NULL; + + rcu_read_lock(); + + /* We maintain a last-hit cache, to avoid browsing over the tree. */ + cic = rcu_dereference(ioc->ioc_data); + if (cic != NULL) { + k = rcu_dereference(cic->key); + if (k == bfqd) + goto out; + } + + do { + cic = radix_tree_lookup(&ioc->bfq_radix_root, + bfqd->cic_index); + if (cic == NULL) + goto out; + + k = rcu_dereference(cic->key); + if (unlikely(k != bfqd)) { + rcu_read_unlock(); + bfq_drop_dead_cic(bfqd, ioc, cic); + rcu_read_lock(); + continue; + } + + spin_lock_irqsave(&ioc->lock, flags); + rcu_assign_pointer(ioc->ioc_data, cic); + spin_unlock_irqrestore(&ioc->lock, flags); + break; + } while (1); + +out: + rcu_read_unlock(); + + return cic; +} + +/** + * bfq_cic_link - add @cic to @ioc. + * @bfqd: bfq_data @cic refers to. + * @ioc: io_context @cic belongs to. + * @cic: the cic to link. + * @gfp_mask: the mask to use for radix tree preallocations. + * + * Add @cic to @ioc, using @bfqd as the search key. This enables us to + * lookup the process specific cfq io context when entered from the block + * layer. Also adds @cic to a per-bfqd list, used when this queue is + * removed. + */ +static int bfq_cic_link(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic, gfp_t gfp_mask) +{ + unsigned long flags; + int ret; + + ret = radix_tree_preload(gfp_mask); + if (ret == 0) { + cic->ioc = ioc; + + /* No write-side locking, cic is not published yet. */ + rcu_assign_pointer(cic->key, bfqd); + + spin_lock_irqsave(&ioc->lock, flags); + ret = radix_tree_insert(&ioc->bfq_radix_root, + bfqd->cic_index, cic); + if (ret == 0) + hlist_add_head_rcu(&cic->cic_list, &ioc->bfq_cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + radix_tree_preload_end(); + + if (ret == 0) { + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + list_add(&cic->queue_list, &bfqd->cic_list); + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); + } + } + + if (ret != 0) + printk(KERN_ERR "bfq: cic link failed!\n"); + + return ret; +} + +/** + * bfq_ioc_set_ioprio - signal a priority change to the cics belonging to @ioc. + * @ioc: the io_context changing its priority. + */ +static inline void bfq_ioc_set_ioprio(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_changed_ioprio); +} + +/** + * bfq_get_io_context - return the @cic associated to @bfqd in @ioc. + * @bfqd: the search key. + * @gfp_mask: the mask to use for cic allocation. + * + * Setup general io context and cfq io context. There can be several cfq + * io contexts per general io context, if this process is doing io to more + * than one device managed by cfq. + */ +static struct cfq_io_context *bfq_get_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct io_context *ioc = NULL; + struct cfq_io_context *cic; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + ioc = get_io_context(gfp_mask, bfqd->queue->node); + if (ioc == NULL) + return NULL; + + /* Lookup for an existing cic. */ + cic = bfq_cic_lookup(bfqd, ioc); + if (cic != NULL) + goto out; + + /* Alloc one if needed. */ + cic = bfq_alloc_io_context(bfqd, gfp_mask); + if (cic == NULL) + goto err; + + /* Link it into the ioc's radix tree and cic list. */ + if (bfq_cic_link(bfqd, ioc, cic, gfp_mask) != 0) + goto err_free; + +out: + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_BFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) + bfq_ioc_set_ioprio(ioc); + + return cic; +err_free: + bfq_cic_free(cic); +err: + put_io_context(ioc); + return NULL; +} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c new file mode 100644 index 00000000000..ee6ce52d9ec --- /dev/null +++ b/block/bfq-iosched.c @@ -0,0 +1,3367 @@ +/* + * BFQ, or Budget Fair Queueing, disk scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. + * + * BFQ is a proportional share disk scheduling algorithm based on the + * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, + * measured in number of sectors, to tasks instead of time slices. + * The disk is not granted to the active task for a given time slice, + * but until it has exahusted its assigned budget. This change from + * the time to the service domain allows BFQ to distribute the disk + * bandwidth among tasks as desired, without any distortion due to + * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc + * internal scheduler, called B-WF2Q+, to schedule tasks according to + * their budgets. Thanks to this accurate scheduler, BFQ can afford + * to assign high budgets to disk-bound non-seeky tasks (to boost the + * throughput), and yet guarantee low latencies to interactive and + * soft real-time applications. + * + * BFQ has been introduced in [1], where the interested reader can + * find an accurate description of the algorithm, the bandwidth + * distribution and latency guarantees it provides, plus formal proofs + * of all the properties. With respect to the algorithm presented in + * the paper, this implementation adds several little heuristics, and + * a hierarchical extension, based on H-WF2Q+. + * + * B-WF2Q+ is based on WF2Q+, that is described in [2], together with + * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) + * complexity derives from the one introduced with EEVDF in [3]. + * + * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling + * with Deterministic Guarantees on Bandwidth Distribution,'', + * IEEE Transactions on Computer, May 2010. + * + * http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf + * + * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing + * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, + * Oct 1997. + * + * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz + * + * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline + * First: A Flexible and Accurate Mechanism for Proportional Share + * Resource Allocation,'' technical report. + * + * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bfq.h" + +/* Max number of dispatches in one round of service. */ +static const int bfq_quantum = 4; + +/* Expiration time of sync (0) and async (1) requests, in jiffies. */ +//static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; +static const int bfq_fifo_expire[2] = { 33, 8 }; + +/* Maximum backwards seek, in KiB. */ +//static const int bfq_back_max = 16 * 1024; +static const int bfq_back_max = 12582912; + +/* Penalty of a backwards seek, in number of sectors. */ +//static const int bfq_back_penalty = 2; +static const int bfq_back_penalty = 1; + +/* Idling period duration, in jiffies. */ +//static int bfq_slice_idle = HZ / 125; +static int bfq_slice_idle = 0; + +/* Default maximum budget values, in sectors and number of requests. */ +//static const int bfq_default_max_budget = 16 * 1024; +static const int bfq_default_max_budget = 12582912; +static const int bfq_max_budget_async_rq = 4; + +/* + * Async to sync throughput distribution is controlled as follows: + * when an async request is served, the entity is charged the number + * of sectors of the request, multipled by the factor below + */ +static const int bfq_async_charge_factor = 10; + +/* Default timeout values, in jiffies, approximating CFQ defaults. */ +static const int bfq_timeout_sync = HZ / 8; +static int bfq_timeout_async = HZ / 25; + +struct kmem_cache *bfq_pool; +struct kmem_cache *bfq_ioc_pool; + +static DEFINE_PER_CPU(unsigned long, bfq_ioc_count); +static struct completion *bfq_ioc_gone; +static DEFINE_SPINLOCK(bfq_ioc_gone_lock); + +static DEFINE_SPINLOCK(cic_index_lock); +static DEFINE_IDA(cic_index_ida); + +/* Below this threshold (in ms), we consider thinktime immediate. */ +#define BFQ_MIN_TT 2 + +/* hw_tag detection: parallel requests threshold and min samples needed. */ +#define BFQ_HW_QUEUE_THRESHOLD 4 +#define BFQ_HW_QUEUE_SAMPLES 32 + +#define BFQQ_SEEK_THR (sector_t)(8 * 1024) +#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) + +/* Min samples used for peak rate estimation (for autotuning). */ +#define BFQ_PEAK_RATE_SAMPLES 32 + +/* Shift used for peak rate fixed precision calculations. */ +#define BFQ_RATE_SHIFT 16 + +/* + * The duration of the weight raising for interactive applications is + * computed automatically (as default behaviour), using the following + * formula: duration = (R / r) * T, where r is the peak rate of the + * disk, and R and T are two reference parameters. In particular, R is + * the peak rate of a reference disk, and T is about the maximum time + * for starting popular large applications on that disk, under BFQ and + * while reading two files in parallel. Finally, BFQ uses two + * different pairs (R, T) depending on whether the disk is rotational + * or non-rotational. + */ +#define T_rot (msecs_to_jiffies(5500)) +#define T_nonrot (msecs_to_jiffies(2000)) +/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */ +#define R_rot 17415 +#define R_nonrot 34791 + +#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +#define RQ_CIC(rq) \ + ((struct cfq_io_context *) (rq)->elevator_private[0]) +#define RQ_BFQQ(rq) ((rq)->elevator_private[1]) + +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); + +#include "bfq-ioc.c" +#include "bfq-sched.c" +#include "bfq-cgroup.c" + +#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_IDLE) +#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_RT) + +#define bfq_sample_valid(samples) ((samples) > 80) + +/* + * We regard a request as SYNC, if either it's a read or has the SYNC bit + * set (in which case it could also be a direct WRITE). + */ +static inline int bfq_bio_sync(struct bio *bio) +{ + if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) + return 1; + + return 0; +} + +/* + * Scheduler run of queue, if there are requests pending and no one in the + * driver that will restart queueing. + */ +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) +{ + if (bfqd->queued != 0) { + bfq_log(bfqd, "schedule dispatch"); + kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); + } +} + +/* + * Lifted from AS - choose which of rq1 and rq2 that is best served now. + * We choose the request that is closesr to the head right now. Distance + * behind the head is penalized and only allowed to a certain extent. + */ +static struct request *bfq_choose_req(struct bfq_data *bfqd, + struct request *rq1, + struct request *rq2, + sector_t last) +{ + sector_t s1, s2, d1 = 0, d2 = 0; + unsigned long back_max; +#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ +#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ + unsigned wrap = 0; /* bit mask: requests behind the disk head? */ + + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; + + if (rq_is_sync(rq1) && !rq_is_sync(rq2)) + return rq1; + else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) + return rq2; + if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) + return rq1; + else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) + return rq2; + + s1 = blk_rq_pos(rq1); + s2 = blk_rq_pos(rq2); + + /* + * By definition, 1KiB is 2 sectors. + */ + back_max = bfqd->bfq_back_max * 2; + + /* + * Strict one way elevator _except_ in the case where we allow + * short backward seeks which are biased as twice the cost of a + * similar forward seek. + */ + if (s1 >= last) + d1 = s1 - last; + else if (s1 + back_max >= last) + d1 = (last - s1) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ1_WRAP; + + if (s2 >= last) + d2 = s2 - last; + else if (s2 + back_max >= last) + d2 = (last - s2) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ2_WRAP; + + /* Found required data */ + + /* + * By doing switch() on the bit mask "wrap" we avoid having to + * check two variables for all permutations: --> faster! + */ + switch (wrap) { + case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ + if (d1 < d2) + return rq1; + else if (d2 < d1) + return rq2; + else { + if (s1 >= s2) + return rq1; + else + return rq2; + } + + case BFQ_RQ2_WRAP: + return rq1; + case BFQ_RQ1_WRAP: + return rq2; + case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ + default: + /* + * Since both rqs are wrapped, + * start with the one that's further behind head + * (--> only *one* back seek required), + * since back seek takes more time than forward. + */ + if (s1 <= s2) + return rq1; + else + return rq2; + } +} + +static struct bfq_queue * +bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + sector_t sector, struct rb_node **ret_parent, + struct rb_node ***rb_link) +{ + struct rb_node **p, *parent; + struct bfq_queue *bfqq = NULL; + + parent = NULL; + p = &root->rb_node; + while (*p) { + struct rb_node **n; + + parent = *p; + bfqq = rb_entry(parent, struct bfq_queue, pos_node); + + /* + * Sort strictly based on sector. Smallest to the left, + * largest to the right. + */ + if (sector > blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_right; + else if (sector < blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_left; + else + break; + p = n; + bfqq = NULL; + } + + *ret_parent = parent; + if (rb_link) + *rb_link = p; + + bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", + (long long unsigned)sector, + bfqq != NULL ? bfqq->pid : 0); + + return bfqq; +} + +static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct rb_node **p, *parent; + struct bfq_queue *__bfqq; + + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + + if (bfq_class_idle(bfqq)) + return; + if (!bfqq->next_rq) + return; + + bfqq->pos_root = &bfqd->rq_pos_tree; + __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, + blk_rq_pos(bfqq->next_rq), &parent, &p); + if (__bfqq == NULL) { + rb_link_node(&bfqq->pos_node, parent, p); + rb_insert_color(&bfqq->pos_node, bfqq->pos_root); + } else + bfqq->pos_root = NULL; +} + +static struct request *bfq_find_next_rq(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *last) +{ + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct request *next = NULL, *prev = NULL; + + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); + + if (rbprev != NULL) + prev = rb_entry_rq(rbprev); + + if (rbnext != NULL) + next = rb_entry_rq(rbnext); + else { + rbnext = rb_first(&bfqq->sort_list); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } + + return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); +} + +/* Must be called with eqm_lock held */ +static void bfq_del_rq_rb(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + const int sync = rq_is_sync(rq); + + BUG_ON(bfqq->queued[sync] == 0); + bfqq->queued[sync]--; + bfqd->queued--; + + elv_rb_del(&bfqq->sort_list, rq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue) + bfq_del_bfqq_busy(bfqd, bfqq, 1); + /* + * Remove queue from request-position tree as it is empty. + */ + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + } +} + +/* see the definition of bfq_async_charge_factor for details */ +static inline unsigned long bfq_serv_to_charge(struct request *rq, + struct bfq_queue *bfqq) +{ + return blk_rq_sectors(rq) * + (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) * + bfq_async_charge_factor)); +} + +/** + * bfq_updated_next_req - update the queue after a new next_rq selection. + * @bfqd: the device data the queue belongs to. + * @bfqq: the queue to update. + * + * If the first request of a queue changes we make sure that the queue + * has enough budget to serve at least its first request (if the + * request has grown). We do this because if the queue has not enough + * budget for its first request, it has to go through two dispatch + * rounds to actually get it dispatched. + */ +static void bfq_updated_next_req(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + struct request *next_rq = bfqq->next_rq; + unsigned long new_budget; + + if (next_rq == NULL) + return; + + if (bfqq == bfqd->active_queue) + /* + * In order not to break guarantees, budgets cannot be + * changed after an entity has been selected. + */ + return; + + BUG_ON(entity->tree != &st->active); + BUG_ON(entity == entity->sched_data->active_entity); + + new_budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + entity->budget = new_budget; + bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget); + bfq_activate_bfqq(bfqd, bfqq); +} + +static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd) +{ + u64 dur; + + if (bfqd->bfq_raising_max_time > 0) + return bfqd->bfq_raising_max_time; + + dur = bfqd->RT_prod; + do_div(dur, bfqd->peak_rate); + + return dur; +} + +static inline void +bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct cfq_io_context *cic) +{ + if (cic->saved_idle_window) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); + if (cic->raising_time_left && bfqq->bfqd->low_latency) { + /* + * Start a weight raising period with the duration given by + * the raising_time_left snapshot. + */ + bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff; + bfqq->raising_cur_max_time = cic->raising_time_left; + bfqq->last_rais_start_finish = jiffies; + } + /* + * Clear raising_time_left to prevent bfq_bfqq_save_state() from + * getting confused about the queue's need of a weight-raising + * period. + */ + cic->raising_time_left = 0; +} + +/* + * Must be called with the queue_lock held. + */ +static int bfqq_process_refs(struct bfq_queue *bfqq) +{ + int process_refs, io_refs; + + io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; + process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; + BUG_ON(process_refs < 0); + return process_refs; +} + +static void bfq_add_rq_rb(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_entity *entity = &bfqq->entity; + struct bfq_data *bfqd = bfqq->bfqd; + struct request *next_rq, *prev; + unsigned long old_raising_coeff = bfqq->raising_coeff; + int idle_for_long_time = bfqq->budget_timeout + + bfqd->bfq_raising_min_idle_time < jiffies; + + bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq)); + bfqq->queued[rq_is_sync(rq)]++; + bfqd->queued++; + + elv_rb_add(&bfqq->sort_list, rq); + + spin_lock(&bfqd->eqm_lock); + + /* + * Check if this request is a better next-serve candidate. + */ + prev = bfqq->next_rq; + next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); + BUG_ON(next_rq == NULL); + bfqq->next_rq = next_rq; + + /* + * Adjust priority tree position, if next_rq changes. + */ + if (prev != bfqq->next_rq) + bfq_rq_pos_tree_add(bfqd, bfqq); + + spin_unlock(&bfqd->eqm_lock); + + if (!bfq_bfqq_busy(bfqq)) { + int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && + bfqq->soft_rt_next_start < jiffies; + entity->budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + + if (! bfqd->low_latency) + goto add_bfqq_busy; + + if (bfq_bfqq_just_split(bfqq)) + goto set_ioprio_changed; + + /* + * If the queue: + * - is not being boosted, + * - has been idle for enough time, + * - is not a sync queue or is linked to a cfq_io_context (it is + * shared "for its nature" or it is not shared and its + * requests have not been redirected to a shared queue) + * start a weight-raising period. + */ + if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt) && + (!bfq_bfqq_sync(bfqq) || bfqq->cic != NULL)) { + bfqq->raising_coeff = bfqd->bfq_raising_coeff; + if (idle_for_long_time) + bfqq->raising_cur_max_time = + bfq_wrais_duration(bfqd); + else + bfqq->raising_cur_max_time = + bfqd->bfq_raising_rt_max_time; + bfq_log_bfqq(bfqd, bfqq, + "wrais starting at %llu msec," + "rais_max_time %u", + bfqq->last_rais_start_finish, + jiffies_to_msecs(bfqq-> + raising_cur_max_time)); + } else if (old_raising_coeff > 1) { + if (idle_for_long_time) + bfqq->raising_cur_max_time = + bfq_wrais_duration(bfqd); + else if (bfqq->raising_cur_max_time == + bfqd->bfq_raising_rt_max_time && + !soft_rt) { + bfqq->raising_coeff = 1; + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %llu msec," + "rais_max_time %u", + bfqq->last_rais_start_finish, + jiffies_to_msecs(bfqq-> + raising_cur_max_time)); + } + } +set_ioprio_changed: + if (old_raising_coeff != bfqq->raising_coeff) + entity->ioprio_changed = 1; +add_bfqq_busy: + bfq_add_bfqq_busy(bfqd, bfqq); + } else { + if(bfqd->low_latency && old_raising_coeff == 1 && + !rq_is_sync(rq) && + bfqq->last_rais_start_finish + + bfqd->bfq_raising_min_inter_arr_async < jiffies) { + bfqq->raising_coeff = bfqd->bfq_raising_coeff; + bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd); + + entity->ioprio_changed = 1; + bfq_log_bfqq(bfqd, bfqq, + "non-idle wrais starting at %llu msec," + "rais_max_time %u", + bfqq->last_rais_start_finish, + jiffies_to_msecs(bfqq-> + raising_cur_max_time)); + } + bfq_updated_next_req(bfqd, bfqq); + } + + if(bfqd->low_latency && + (old_raising_coeff == 1 || bfqq->raising_coeff == 1 || + idle_for_long_time)) + bfqq->last_rais_start_finish = jiffies; +} + +static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq) +{ + elv_rb_del(&bfqq->sort_list, rq); + bfqq->queued[rq_is_sync(rq)]--; + bfqq->bfqd->queued--; + bfq_add_rq_rb(rq); +} + +static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, + struct bio *bio) +{ + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return NULL; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) { + sector_t sector = bio->bi_sector + bio_sectors(bio); + + return elv_rb_find(&bfqq->sort_list, sector); + } + + return NULL; +} + +static void bfq_activate_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + bfqd->rq_in_driver++; + bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); + bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", + (long long unsigned)bfqd->last_position); +} + +static void bfq_deactivate_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + WARN_ON(bfqd->rq_in_driver == 0); + bfqd->rq_in_driver--; +} + +static void bfq_remove_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + + spin_lock(&bfqq->bfqd->eqm_lock); + if (bfqq->next_rq == rq) { + bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); + bfq_updated_next_req(bfqd, bfqq); + } + + list_del_init(&rq->queuelist); + bfq_del_rq_rb(rq); + spin_unlock(&bfqq->bfqd->eqm_lock); + + if (rq->cmd_flags & REQ_META) { + WARN_ON(bfqq->meta_pending == 0); + bfqq->meta_pending--; + } +} + +static int bfq_merge(struct request_queue *q, struct request **req, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct request *__rq; + + __rq = bfq_find_rq_fmerge(bfqd, bio); + if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + + return ELEVATOR_NO_MERGE; +} + +static void bfq_merged_request(struct request_queue *q, struct request *req, + int type) +{ + if (type == ELEVATOR_FRONT_MERGE) { + struct bfq_queue *bfqq = RQ_BFQQ(req); + + bfq_reposition_rq_rb(bfqq, req); + } +} + +static void bfq_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + /* + * Reposition in fifo if next is older than rq. + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && + time_before(rq_fifo_time(next), rq_fifo_time(rq))) { + list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } + + /* + * eqm_lock needed to avoid that other critical sections not holding + * the queue_lock read an inconsistent value from bfqq->next_rq while + * traversing the rq_pos_trees + */ + if (bfqq->next_rq == next) { + spin_lock(&bfqq->bfqd->eqm_lock); + bfqq->next_rq = rq; + spin_unlock(&bfqq->bfqd->eqm_lock); + } + + bfq_remove_request(next); +} + +/* Must be called with bfqq != NULL */ +static inline void bfq_bfqq_end_raising(struct bfq_queue *bfqq) +{ + BUG_ON(bfqq == NULL); + bfqq->raising_coeff = 1; + bfqq->raising_cur_max_time = 0; + /* Trigger a weight change on the next activation of the queue */ + bfqq->entity.ioprio_changed = 1; +} + +static void bfq_end_raising_async_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_BE_NR; j++) + if (bfqg->async_bfqq[i][j] != NULL) + bfq_bfqq_end_raising(bfqg->async_bfqq[i][j]); + if (bfqg->async_idle_bfqq != NULL) + bfq_bfqq_end_raising(bfqg->async_idle_bfqq); +} + +static void bfq_end_raising(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + + spin_lock_irq(bfqd->queue->queue_lock); + + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) + bfq_bfqq_end_raising(bfqq); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) + bfq_bfqq_end_raising(bfqq); + bfq_end_raising_async(bfqd); + + spin_unlock_irq(bfqd->queue->queue_lock); +} + +static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) +{ + if (request) + return blk_rq_pos(io_struct); + else + return ((struct bio *)io_struct)->bi_sector; +} + +static inline sector_t bfq_dist_from(sector_t pos1, + sector_t pos2) +{ + if (pos1 >= pos2) + return pos1 - pos2; + else + return pos2 - pos1; +} + +static inline int bfq_rq_close_to_sector(void *io_struct, bool request, + sector_t sector) +{ + return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= + BFQQ_SEEK_THR; +} + +static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) +{ + struct rb_root *root = &bfqd->rq_pos_tree; + struct rb_node *parent, *node; + struct bfq_queue *__bfqq; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + /* + * First, if we find a request starting at the end of the last + * request, choose it. + */ + __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); + if (__bfqq != NULL) + return __bfqq; + + /* + * If the exact sector wasn't found, the parent of the NULL leaf + * will contain the closest sector (rq_pos_tree sorted by next_request + * position). + */ + __bfqq = rb_entry(parent, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + if (blk_rq_pos(__bfqq->next_rq) < sector) + node = rb_next(&__bfqq->pos_node); + else + node = rb_prev(&__bfqq->pos_node); + if (node == NULL) + return NULL; + + __bfqq = rb_entry(node, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + return NULL; +} + +/* + * bfqd - obvious + * cur_bfqq - passed in so that we don't decide that the current queue + * is closely cooperating with itself + * sector - used as a reference point to search for a close queue + */ +static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, + struct bfq_queue *cur_bfqq, + sector_t sector) +{ + struct bfq_queue *bfqq; + + if (bfq_class_idle(cur_bfqq)) + return NULL; + if (!bfq_bfqq_sync(cur_bfqq)) + return NULL; + if (BFQQ_SEEKY(cur_bfqq)) + return NULL; + + /* If device has only one backlogged bfq_queue, don't search. */ + if (bfqd->busy_queues == 1) + return NULL; + + /* + * We should notice if some of the queues are cooperating, e.g. + * working closely on the same area of the disk. In that case, + * we can group them together and don't waste time idling. + */ + bfqq = bfqq_close(bfqd, sector); + if (bfqq == NULL || bfqq == cur_bfqq) + return NULL; + + /* + * Do not merge queues from different bfq_groups. + */ + if (bfqq->entity.parent != cur_bfqq->entity.parent) + return NULL; + + /* + * It only makes sense to merge sync queues. + */ + if (!bfq_bfqq_sync(bfqq)) + return NULL; + if (BFQQ_SEEKY(bfqq)) + return NULL; + + /* + * Do not merge queues of different priority classes. + */ + if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) + return NULL; + + return bfqq; +} + +static struct bfq_queue * +bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + int process_refs, new_process_refs; + struct bfq_queue *__bfqq; + + /* + * If there are no process references on the new_bfqq, then it is + * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain + * may have dropped their last reference (not just their last process + * reference). + */ + if (!bfqq_process_refs(new_bfqq)) + return NULL; + + /* Avoid a circular list and skip interim queue merges. */ + while ((__bfqq = new_bfqq->new_bfqq)) { + if (__bfqq == bfqq) + return NULL; + new_bfqq = __bfqq; + } + + process_refs = bfqq_process_refs(bfqq); + new_process_refs = bfqq_process_refs(new_bfqq); + /* + * If the process for the bfqq has gone away, there is no + * sense in merging the queues. + */ + if (process_refs == 0 || new_process_refs == 0) + return NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", + new_bfqq->pid); + + /* + * Merging is just a redirection: the requests of the process owning + * one of the two queues are redirected to the other queue. The latter + * queue, in its turn, is set as shared if this is the first time that + * the requests of some process are redirected to it. + * + * We redirect bfqq to new_bfqq and not the opposite, because we + * are in the context of the process owning bfqq, hence we have the + * io_cq of this process. So we can immediately configure this io_cq + * to redirect the requests of the process to new_bfqq. + * + * NOTE, even if new_bfqq coincides with the active queue, the io_cq of + * new_bfqq is not available, because, if the active queue is shared, + * bfqd->active_cic may not point to the io_cq of the active queue. + * Redirecting the requests of the process owning bfqq to the currently + * active queue is in any case the best option, as we feed the active queue + * with new requests close to the last request served and, by doing so, + * hopefully increase the throughput. + */ + bfqq->new_bfqq = new_bfqq; + atomic_add(process_refs, &new_bfqq->ref); + return new_bfqq; +} + +/* + * Attempt to schedule a merge of bfqq with the currently active queue or + * with a close queue among the scheduled queues. + * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue + * structure otherwise. + */ +static struct bfq_queue * +bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + void *io_struct, bool request) +{ + struct bfq_queue *active_bfqq, *new_bfqq; + + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + + if (!io_struct) + return NULL; + + active_bfqq = bfqd->active_queue; + + if (active_bfqq == NULL || active_bfqq == bfqq || !bfqd->active_cic) + goto check_scheduled; + + if (bfq_class_idle(active_bfqq) || bfq_class_idle(bfqq)) + goto check_scheduled; + + if (bfq_class_rt(active_bfqq) != bfq_class_rt(bfqq)) + goto check_scheduled; + + if (active_bfqq->entity.parent != bfqq->entity.parent) + goto check_scheduled; + + if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + bfq_bfqq_sync(active_bfqq) && bfq_bfqq_sync(bfqq)) + if ((new_bfqq = bfq_setup_merge(bfqq, active_bfqq))) + return new_bfqq; /* Merge with the active queue */ + + /* + * Check whether there is a cooperator among currently scheduled + * queues. The only thing we need is that the bio/request is not + * NULL, as we need it to establish whether a cooperator exists. + */ +check_scheduled: + new_bfqq = bfq_close_cooperator(bfqd, bfqq, + bfq_io_struct_pos(io_struct, request)); + if (new_bfqq) + return bfq_setup_merge(bfqq, new_bfqq); + + return NULL; +} + +static inline void +bfq_bfqq_save_state(struct bfq_queue *bfqq) +{ + /* + * If bfqq->cic == NULL, the queue is already shared or its requests + * have already been redirected to a shared queue; both idle window + * and weight raising state have already been saved. Do nothing. + */ + if (bfqq->cic == NULL) + return; + if (bfqq->cic->raising_time_left) + /* + * This is the queue of a just-started process, and would + * deserve weight raising: we set raising_time_left to the full + * weight-raising duration to trigger weight-raising when and + * if the queue is split and the first request of the queue + * is enqueued. + */ + bfqq->cic->raising_time_left = bfq_wrais_duration(bfqq->bfqd); + else if (bfqq->raising_coeff > 1) { + unsigned long wrais_duration = + jiffies - bfqq->last_rais_start_finish; + /* + * It may happen that a queue's weight raising period lasts + * longer than its raising_cur_max_time, as weight raising is + * handled only when a request is enqueued or dispatched (it + * does not use any timer). If the weight raising period is + * about to end, don't save it. + */ + if (bfqq->raising_cur_max_time <= wrais_duration) + bfqq->cic->raising_time_left = 0; + else + bfqq->cic->raising_time_left = + bfqq->raising_cur_max_time - wrais_duration; + /* + * The bfq_queue is becoming shared or the requests of the + * process owning the queue are being redirected to a shared + * queue. Stop the weight raising period of the queue, as in + * both cases it should not be owned by an interactive or soft + * real-time application. + */ + bfq_bfqq_end_raising(bfqq); + } else + bfqq->cic->raising_time_left = 0; + bfqq->cic->saved_idle_window = bfq_bfqq_idle_window(bfqq); +} + +static inline void +bfq_get_cic_reference(struct bfq_queue *bfqq) +{ + /* + * If bfqq->cic has a non-NULL value, the cic to which it belongs + * is about to begin using a shared bfq_queue. + */ + if (bfqq->cic) + atomic_long_inc(&bfqq->cic->ioc->refcount); +} + +static void +bfq_merge_bfqqs(struct bfq_data *bfqd, struct cfq_io_context *cic, + struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", + (long unsigned)new_bfqq->pid); + /* Save weight raising and idle window of the merged queues */ + bfq_bfqq_save_state(bfqq); + bfq_bfqq_save_state(new_bfqq); + /* + * Grab a reference to the cic, to prevent it from being destroyed + * before being possibly touched by a bfq_split_bfqq(). + */ + bfq_get_cic_reference(bfqq); + bfq_get_cic_reference(new_bfqq); + /* Merge queues (that is, let cic redirect its requests to new_bfqq) */ + cic_set_bfqq(cic, new_bfqq, 1); + bfq_mark_bfqq_coop(new_bfqq); + /* + * new_bfqq now belongs to at least two cics (it is a shared queue): set + * new_bfqq->cic to NULL. bfqq either: + * - does not belong to any cic any more, and hence bfqq->cic must + * be set to NULL, or + * - is a queue whose owning cics have already been redirected to a + * different queue, hence the queue is destined to not belong to any + * cic soon and bfqq->cic is already NULL (therefore the next + * assignment causes no harm). + */ + new_bfqq->cic = NULL; + bfqq->cic = NULL; + bfq_put_queue(bfqq); +} + +static int bfq_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + struct bfq_queue *bfqq, *new_bfqq; + unsigned long flags; + + /* Disallow merge of a sync bio into an async request. */ + if (bfq_bio_sync(bio) && !rq_is_sync(rq)) + return 0; + + /* + * Lookup the bfqq that this bio will be queued with. Allow + * merge only if rq is queued there. + */ + cic = bfq_cic_lookup(bfqd, current->io_context); + if (cic == NULL) + return 0; + + /* + * The allow_merge_fn scheduler hook may be called with or without + * the queue_lock being held. Access to the rq_pos_tree data + * structures and to cic->bfqq[] is protected by the eqm_lock. + */ + spin_lock_irqsave(&bfqd->eqm_lock, flags); + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + /* + * We take advantage of this function to perform an early merge + * of the queues of possible cooperating processes. + */ + if (bfqq != NULL && + (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false))) { + bfq_merge_bfqqs(bfqd, cic, bfqq, new_bfqq); + /* + * If we get here, the bio will be queued in the shared queue, + * i.e., new_bfqq, so use new_bfqq to decide whether bio and + * rq can be merged. + */ + bfqq = new_bfqq; + } + spin_unlock_irqrestore(&bfqd->eqm_lock, flags); + + return bfqq == RQ_BFQQ(rq); +} + +static void __bfq_set_active_queue(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + if (bfqq != NULL) { + bfq_mark_bfqq_must_alloc(bfqq); + bfq_mark_bfqq_budget_new(bfqq); + bfq_clear_bfqq_fifo_expire(bfqq); + + bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; + + bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu", + bfqq->entity.budget); + } + + bfqd->active_queue = bfqq; +} + +/* + * Get and set a new active queue for service. + */ +static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); + + __bfq_set_active_queue(bfqd, bfqq); + return bfqq; +} + +/* + * If enough samples have been computed, return the current max budget + * stored in bfqd, which is dynamically updated according to the + * estimated disk peak rate; otherwise return the default max budget + */ +static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget; + else + return bfqd->bfq_max_budget; +} + +/* + * Return min budget, which is a fraction of the current or default + * max budget (trying with 1/32) + */ +static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget / 32; + else + return bfqd->bfq_max_budget / 32; +} + +/* + * Decides whether idling should be done for given device and + * given active queue. + */ +static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd, + struct bfq_queue *active_bfqq) +{ + if (active_bfqq == NULL) + return false; + /* + * If device is SSD it has no seek penalty, disable idling; but + * do so only if: + * - device does not support queuing, otherwise we still have + * a problem with sync vs async workloads; + * - the queue is not weight-raised, to preserve guarantees. + */ + return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag && + active_bfqq->raising_coeff == 1); +} + +static void bfq_arm_slice_timer(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->active_queue; + struct cfq_io_context *cic; + unsigned long sl; + + WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + /* Tasks have exited, don't wait. */ + cic = bfqd->active_cic; + if (cic == NULL || atomic_read(&cic->ioc->nr_tasks) == 0) + return; + + bfq_mark_bfqq_wait_request(bfqq); + + /* + * We don't want to idle for seeks, but we do want to allow + * fair distribution of slice time for a process doing back-to-back + * seeks. So allow a little bit of time for him to submit a new rq. + * + * To prevent processes with (partly) seeky workloads from + * being too ill-treated, grant them a small fraction of the + * assigned budget before reducing the waiting time to + * BFQ_MIN_TT. This happened to help reduce latency. + */ + sl = bfqd->bfq_slice_idle; + if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) && + bfqq->entity.service > bfq_max_budget(bfqd) / 8 && + bfqq->raising_coeff == 1) + sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); + else if (bfqq->raising_coeff > 1) + sl = sl * 3; + bfqd->last_idling_start = ktime_get(); + mod_timer(&bfqd->idle_slice_timer, jiffies + sl); + bfq_log(bfqd, "arm idle: %u/%u ms", + jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); +} + +/* + * Set the maximum time for the active queue to consume its + * budget. This prevents seeky processes from lowering the disk + * throughput (always guaranteed with a time slice scheme as in CFQ). + */ +static void bfq_set_budget_timeout(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->active_queue; + unsigned int timeout_coeff; + if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time) + timeout_coeff = 1; + else + timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; + + bfqd->last_budget_start = ktime_get(); + + bfq_clear_bfqq_budget_new(bfqq); + bfqq->budget_timeout = jiffies + + bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; + + bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", + jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * + timeout_coeff)); +} + +/* + * Move request from internal lists to the request queue dispatch list. + */ +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + bfq_remove_request(rq); + bfqq->dispatched++; + elv_dispatch_sort(q, rq); + + if (bfq_bfqq_sync(bfqq)) + bfqd->sync_flight++; +} + +/* + * Return expired entry, or NULL to just start from scratch in rbtree. + */ +static struct request *bfq_check_fifo(struct bfq_queue *bfqq) +{ + struct request *rq = NULL; + + if (bfq_bfqq_fifo_expire(bfqq)) + return NULL; + + bfq_mark_bfqq_fifo_expire(bfqq); + + if (list_empty(&bfqq->fifo)) + return NULL; + + rq = rq_entry_fifo(bfqq->fifo.next); + + if (time_before(jiffies, rq_fifo_time(rq))) + return NULL; + + return rq; +} + +static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + return entity->budget - entity->service; +} + +/* Must be called with eqm_lock held */ +static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfqq != bfqd->active_queue); + + __bfq_bfqd_reset_active(bfqd); + + /* + * If this bfqq is shared between multiple processes, check + * to make sure that those processes are still issuing I/Os + * within the mean seek distance. If not, it may be time to + * break the queues apart again. + */ + if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) + bfq_mark_bfqq_split_coop(bfqq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + /* + * overloading budget_timeout field to store when + * the queue remains with no backlog, used by + * the weight-raising mechanism + */ + bfqq->budget_timeout = jiffies ; + bfq_del_bfqq_busy(bfqd, bfqq, 1); + } + else { + bfq_activate_bfqq(bfqd, bfqq); + /* + * Resort priority tree of potential close cooperators. + */ + bfq_rq_pos_tree_add(bfqd, bfqq); + } +} + +/** + * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. + * @bfqd: device data. + * @bfqq: queue to update. + * @reason: reason for expiration. + * + * Handle the feedback on @bfqq budget. See the body for detailed + * comments. + */ +static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + enum bfqq_expiration reason) +{ + struct request *next_rq; + unsigned long budget, min_budget; + + budget = bfqq->max_budget; + min_budget = bfq_min_budget(bfqd); + + BUG_ON(bfqq != bfqd->active_queue); + + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", + bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", + budget, bfq_min_budget(bfqd)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", + bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue)); + + if (bfq_bfqq_sync(bfqq)) { + switch (reason) { + /* + * Caveat: in all the following cases we trade latency + * for throughput. + */ + case BFQ_BFQQ_TOO_IDLE: + /* + * This is the only case where we may reduce + * the budget: if there is no requets of the + * process still waiting for completion, then + * we assume (tentatively) that the timer has + * expired because the batch of requests of + * the process could have been served with a + * smaller budget. Hence, betting that + * process will behave in the same way when it + * becomes backlogged again, we reduce its + * next budget. As long as we guess right, + * this budget cut reduces the latency + * experienced by the process. + * + * However, if there are still outstanding + * requests, then the process may have not yet + * issued its next request just because it is + * still waiting for the completion of some of + * the still oustanding ones. So in this + * subcase we do not reduce its budget, on the + * contrary we increase it to possibly boost + * the throughput, as discussed in the + * comments to the BUDGET_TIMEOUT case. + */ + if (bfqq->dispatched > 0) /* still oustanding reqs */ + budget = min(budget * 2, bfqd->bfq_max_budget); + else { + if (budget > 5 * min_budget) + budget -= 4 * min_budget; + else + budget = min_budget; + } + break; + case BFQ_BFQQ_BUDGET_TIMEOUT: + /* + * We double the budget here because: 1) it + * gives the chance to boost the throughput if + * this is not a seeky process (which may have + * bumped into this timeout because of, e.g., + * ZBR), 2) together with charge_full_budget + * it helps give seeky processes higher + * timestamps, and hence be served less + * frequently. + */ + budget = min(budget * 2, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_BUDGET_EXHAUSTED: + /* + * The process still has backlog, and did not + * let either the budget timeout or the disk + * idling timeout expire. Hence it is not + * seeky, has a short thinktime and may be + * happy with a higher budget too. So + * definitely increase the budget of this good + * candidate to boost the disk throughput. + */ + budget = min(budget * 4, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_NO_MORE_REQUESTS: + /* + * Leave the budget unchanged. + */ + default: + return; + } + } else /* async queue */ + /* async queues get always the maximum possible budget + * (their ability to dispatch is limited by + * @bfqd->bfq_max_budget_async_rq). + */ + budget = bfqd->bfq_max_budget; + + bfqq->max_budget = budget; + + if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && + bfqq->max_budget > bfqd->bfq_max_budget) + bfqq->max_budget = bfqd->bfq_max_budget; + + /* + * Make sure that we have enough budget for the next request. + * Since the finish time of the bfqq must be kept in sync with + * the budget, be sure to call __bfq_bfqq_expire() after the + * update. + */ + next_rq = bfqq->next_rq; + if (next_rq != NULL) + bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + else + bfqq->entity.budget = bfqq->max_budget; + + bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", + next_rq != NULL ? blk_rq_sectors(next_rq) : 0, + bfqq->entity.budget); +} + +static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) +{ + unsigned long max_budget; + + /* + * The max_budget calculated when autotuning is equal to the + * amount of sectors transfered in timeout_sync at the + * estimated peak rate. + */ + max_budget = (unsigned long)(peak_rate * 1000 * + timeout >> BFQ_RATE_SHIFT); + + return max_budget; +} + +/* + * In addition to updating the peak rate, checks whether the process + * is "slow", and returns 1 if so. This slow flag is used, in addition + * to the budget timeout, to reduce the amount of service provided to + * seeky processes, and hence reduce their chances to lower the + * throughput. See the code for more details. + */ +static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int compensate, enum bfqq_expiration reason) +{ + u64 bw, usecs, expected, timeout; + ktime_t delta; + int update = 0; + + if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) + return 0; + + if (compensate) + delta = bfqd->last_idling_start; + else + delta = ktime_get(); + delta = ktime_sub(delta, bfqd->last_budget_start); + usecs = ktime_to_us(delta); + + /* Don't trust short/unrealistic values. */ + if (usecs < 100 || usecs >= LONG_MAX) + return 0; + + /* + * Calculate the bandwidth for the last slice. We use a 64 bit + * value to store the peak rate, in sectors per usec in fixed + * point math. We do so to have enough precision in the estimate + * and to avoid overflows. + */ + bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; + do_div(bw, (unsigned long)usecs); + + timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + /* + * Use only long (> 20ms) intervals to filter out spikes for + * the peak rate estimation. + */ + if (usecs > 20000) { + if (bw > bfqd->peak_rate || + (!BFQQ_SEEKY(bfqq) && + reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { + bfq_log(bfqd, "measured bw =%llu", bw); + /* + * To smooth oscillations use a low-pass filter with + * alpha=7/8, i.e., + * new_rate = (7/8) * old_rate + (1/8) * bw + */ + do_div(bw, 8); + if (bw == 0) + return 0; + bfqd->peak_rate *= 7; + do_div(bfqd->peak_rate, 8); + bfqd->peak_rate += bw; + update = 1; + bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); + } + + update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; + + if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) + bfqd->peak_rate_samples++; + + if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && + update && bfqd->bfq_user_max_budget == 0) { + bfqd->bfq_max_budget = + bfq_calc_max_budget(bfqd->peak_rate, timeout); + bfq_log(bfqd, "new max_budget=%lu", + bfqd->bfq_max_budget); + } + } + + /* + * If the process has been served for a too short time + * interval to let its possible sequential accesses prevail on + * the initial seek time needed to move the disk head on the + * first sector it requested, then give the process a chance + * and for the moment return false. + */ + if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) + return 0; + + /* + * A process is considered ``slow'' (i.e., seeky, so that we + * cannot treat it fairly in the service domain, as it would + * slow down too much the other processes) if, when a slice + * ends for whatever reason, it has received service at a + * rate that would not be high enough to complete the budget + * before the budget timeout expiration. + */ + expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; + + /* + * Caveat: processes doing IO in the slower disk zones will + * tend to be slow(er) even if not seeky. And the estimated + * peak rate will actually be an average over the disk + * surface. Hence, to not be too harsh with unlucky processes, + * we keep a budget/3 margin of safety before declaring a + * process slow. + */ + return expected > (4 * bfqq->entity.budget) / 3; +} + +/** + * bfq_bfqq_expire - expire a queue. + * @bfqd: device owning the queue. + * @bfqq: the queue to expire. + * @compensate: if true, compensate for the time spent idling. + * @reason: the reason causing the expiration. + * + * + * If the process associated to the queue is slow (i.e., seeky), or in + * case of budget timeout, or, finally, if it is async, we + * artificially charge it an entire budget (independently of the + * actual service it received). As a consequence, the queue will get + * higher timestamps than the correct ones upon reactivation, and + * hence it will be rescheduled as if it had received more service + * than what it actually received. In the end, this class of processes + * will receive less service in proportion to how slowly they consume + * their budgets (and hence how seriously they tend to lower the + * throughput). + * + * In contrast, when a queue expires because it has been idling for + * too much or because it exhausted its budget, we do not touch the + * amount of service it has received. Hence when the queue will be + * reactivated and its timestamps updated, the latter will be in sync + * with the actual service received by the queue until expiration. + * + * Charging a full budget to the first type of queues and the exact + * service to the others has the effect of using the WF2Q+ policy to + * schedule the former on a timeslice basis, without violating the + * service domain guarantees of the latter. + */ +static void bfq_bfqq_expire(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + int compensate, + enum bfqq_expiration reason) +{ + int slow; + BUG_ON(bfqq != bfqd->active_queue); + + /* Update disk peak rate for autotuning and check whether the + * process is slow (see bfq_update_peak_rate). + */ + slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); + + /* + * As above explained, 'punish' slow (i.e., seeky), timed-out + * and async queues, to favor sequential sync workloads. + * + * Processes doing IO in the slower disk zones will tend to be + * slow(er) even if not seeky. Hence, since the estimated peak + * rate is actually an average over the disk surface, these + * processes may timeout just for bad luck. To avoid punishing + * them we do not charge a full budget to a process that + * succeeded in consuming at least 2/3 of its budget. + */ + if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) + bfq_bfqq_charge_full_budget(bfqq); + + if (bfqd->low_latency && bfqq->raising_coeff == 1) + bfqq->last_rais_start_finish = jiffies; + + if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) { + if(reason != BFQ_BFQQ_BUDGET_TIMEOUT) + bfqq->soft_rt_next_start = + jiffies + + HZ * bfqq->entity.service / + bfqd->bfq_raising_max_softrt_rate; + else + bfqq->soft_rt_next_start = -1; /* infinity */ + } + bfq_log_bfqq(bfqd, bfqq, + "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow, + bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); + + /* Increase, decrease or leave budget unchanged according to reason */ + __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); + spin_lock(&bfqd->eqm_lock); + __bfq_bfqq_expire(bfqd, bfqq); + spin_unlock(&bfqd->eqm_lock); +} + +/* + * Budget timeout is not implemented through a dedicated timer, but + * just checked on request arrivals and completions, as well as on + * idle timer expirations. + */ +static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_budget_new(bfqq)) + return 0; + + if (time_before(jiffies, bfqq->budget_timeout)) + return 0; + + return 1; +} + +/* + * If we expire a queue that is waiting for the arrival of a new + * request, we may prevent the fictitious timestamp backshifting that + * allows the guarantees of the queue to be preserved (see [1] for + * this tricky aspect). Hence we return true only if this condition + * does not hold, or if the queue is slow enough to deserve only to be + * kicked off for preserving a high throughput. +*/ +static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, + "may_budget_timeout: wr %d left %d timeout %d", + bfq_bfqq_wait_request(bfqq), + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, + bfq_bfqq_budget_timeout(bfqq)); + + return (!bfq_bfqq_wait_request(bfqq) || + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) + && + bfq_bfqq_budget_timeout(bfqq); +} + +/* + * If the active queue is empty, but it is sync and either of the following + * conditions holds, then: 1) the queue must remain active and cannot be + * expired, and 2) the disk must be idled to wait for the possible arrival + * of a new request for the queue. The conditions are: + * - the device is rotational and not performing NCQ, and the queue has its + * idle window set (in this case, waiting for a new request for the queue + * is likely to boost the disk throughput); + * - the queue is weight-raised (waiting for the request is necessary for + * providing the queue with fairness and latency guarantees). + * + * In any case, idling can be disabled for cooperation issues, if + * 1) there is a close cooperator for the queue, or + * 2) the queue is shared and some cooperator is likely to be idle (in this + * case, by not arming the idle timer, we try to slow down the queue, to + * prevent the zones of the disk accessed by the active cooperators to + * become too distant from the zone that will be accessed by the currently + * idle cooperators). + */ +static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq, + int budg_timeout) +{ + struct bfq_data *bfqd = bfqq->bfqd; + + struct bfq_queue *coop_bfqq; + + spin_lock(&bfqd->eqm_lock); + coop_bfqq = bfq_close_cooperator(bfqd, bfqq, bfqd->last_position); + spin_unlock(&bfqd->eqm_lock); + + return (bfq_bfqq_sync(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) && + bfqd->bfq_slice_idle != 0 && + ((bfq_bfqq_idle_window(bfqq) && !bfqd->hw_tag && + !blk_queue_nonrot(bfqd->queue)) + || bfqq->raising_coeff > 1) && + (bfqd->rq_in_driver == 0 || + budg_timeout || + bfqq->raising_coeff > 1) && + !coop_bfqq && + (!bfq_bfqq_coop(bfqq) || + !bfq_bfqq_some_coop_idle(bfqq)) && + !bfq_queue_nonrot_noidle(bfqd, bfqq)); +} + +/* + * Select a queue for service. If we have a current active queue, + * check whether to continue servicing it, or retrieve and set a new one. + */ +static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + struct request *next_rq; + enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; + int budg_timeout; + + bfqq = bfqd->active_queue; + if (bfqq == NULL) + goto new_queue; + + bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue"); + + budg_timeout = bfq_may_expire_for_budg_timeout(bfqq); + if (budg_timeout && + !bfq_bfqq_must_idle(bfqq, budg_timeout)) + goto expire; + + next_rq = bfqq->next_rq; + /* + * If bfqq has requests queued and it has enough budget left to + * serve them, keep the queue, otherwise expire it. + */ + if (next_rq != NULL) { + if (bfq_serv_to_charge(next_rq, bfqq) > + bfq_bfqq_budget_left(bfqq)) { + reason = BFQ_BFQQ_BUDGET_EXHAUSTED; + goto expire; + } else { + /* + * The idle timer may be pending because we may not + * disable disk idling even when a new request arrives + */ + if (timer_pending(&bfqd->idle_slice_timer)) { + /* + * If we get here: 1) at least a new request + * has arrived but we have not disabled the + * timer because the request was too small, + * 2) then the block layer has unplugged the + * device, causing the dispatch to be invoked. + * + * Since the device is unplugged, now the + * requests are probably large enough to + * provide a reasonable throughput. + * So we disable idling. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + } + goto keep_queue; + } + } + + /* + * No requests pending. If there is no cooperator, and the active + * queue still has requests in flight or is idling for a new request, + * then keep it. + */ + if (timer_pending(&bfqd->idle_slice_timer) || + (bfqq->dispatched != 0 && + (bfq_bfqq_idle_window(bfqq) || bfqq->raising_coeff > 1) && + !bfq_queue_nonrot_noidle(bfqd, bfqq))) { + bfqq = NULL; + goto keep_queue; + } + + reason = BFQ_BFQQ_NO_MORE_REQUESTS; +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, reason); +new_queue: + bfqq = bfq_set_active_queue(bfqd); + bfq_log(bfqd, "select_queue: new queue %d returned", + bfqq != NULL ? bfqq->pid : 0); +keep_queue: + return bfqq; +} + +static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + if (bfqq->raising_coeff > 1) { /* queue is being boosted */ + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, " + "old raising coeff %u, w %d(%d)", + jiffies_to_msecs(jiffies - + bfqq->last_rais_start_finish), + jiffies_to_msecs(bfqq->raising_cur_max_time), + bfqq->raising_coeff, + bfqq->entity.weight, bfqq->entity.orig_weight); + + BUG_ON(bfqq != bfqd->active_queue && entity->weight != + entity->orig_weight * bfqq->raising_coeff); + if(entity->ioprio_changed) + bfq_log_bfqq(bfqd, bfqq, + "WARN: pending prio change"); + /* + * If too much time has elapsed from the beginning + * of this weight-raising period and process is not soft + * real-time, stop it + */ + if (jiffies - bfqq->last_rais_start_finish > + bfqq->raising_cur_max_time) { + int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && + bfqq->soft_rt_next_start < jiffies; + + bfqq->last_rais_start_finish = jiffies; + if (soft_rt) + bfqq->raising_cur_max_time = + bfqd->bfq_raising_rt_max_time; + else + bfq_bfqq_end_raising(bfqq); + } + } + /* Update weight both if it must be raised and if it must be lowered */ + if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1)) + __bfq_entity_update_weight_prio( + bfq_entity_service_tree(entity), + entity); +} + +/* + * Dispatch one request from bfqq, moving it to the request queue + * dispatch list. + */ +static int bfq_dispatch_request(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + int dispatched = 0; + struct request *rq; + unsigned long service_to_charge; + + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + + /* Follow expired path, else get first next available. */ + rq = bfq_check_fifo(bfqq); + if (rq == NULL) + rq = bfqq->next_rq; + service_to_charge = bfq_serv_to_charge(rq, bfqq); + + if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { + /* + * This may happen if the next rq is chosen + * in fifo order instead of sector order. + * The budget is properly dimensioned + * to be always sufficient to serve the next request + * only if it is chosen in sector order. The reason is + * that it would be quite inefficient and little useful + * to always make sure that the budget is large enough + * to serve even the possible next rq in fifo order. + * In fact, requests are seldom served in fifo order. + * + * Expire the queue for budget exhaustion, and + * make sure that the next act_budget is enough + * to serve the next request, even if it comes + * from the fifo expired path. + */ + bfqq->next_rq = rq; + /* + * Since this dispatch is failed, make sure that + * a new one will be performed + */ + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); + goto expire; + } + + /* Finally, insert request into driver dispatch list. */ + bfq_bfqq_served(bfqq, service_to_charge); + bfq_dispatch_insert(bfqd->queue, rq); + + update_raising_data(bfqd, bfqq); + + bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), " + "budg left %lu", + blk_rq_sectors(rq), + (long long unsigned)blk_rq_pos(rq), + bfq_bfqq_budget_left(bfqq)); + + dispatched++; + + if (bfqd->active_cic == NULL) { + atomic_long_inc(&RQ_CIC(rq)->ioc->refcount); + bfqd->active_cic = RQ_CIC(rq); + } + + if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && + dispatched >= bfqd->bfq_max_budget_async_rq) || + bfq_class_idle(bfqq))) + goto expire; + + return dispatched; + +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); + return dispatched; +} + +static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) +{ + int dispatched = 0; + + while (bfqq->next_rq != NULL) { + bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); + dispatched++; + } + + BUG_ON(!list_empty(&bfqq->fifo)); + return dispatched; +} + +/* + * Drain our current requests. Used for barriers and when switching + * io schedulers on-the-fly. + */ +static int bfq_forced_dispatch(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq, *n; + struct bfq_service_tree *st; + int dispatched = 0; + + bfqq = bfqd->active_queue; + if (bfqq != NULL) { + spin_lock(&bfqd->eqm_lock); + __bfq_bfqq_expire(bfqd, bfqq); + spin_unlock(&bfqd->eqm_lock); + } + + /* + * Loop through classes, and be careful to leave the scheduler + * in a consistent state, as feedback mechanisms and vtime + * updates cannot be disabled during the process. + */ + list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { + st = bfq_entity_service_tree(&bfqq->entity); + + dispatched += __bfq_forced_dispatch_bfqq(bfqq); + bfqq->max_budget = bfq_max_budget(bfqd); + + bfq_forget_idle(st); + } + + BUG_ON(bfqd->busy_queues != 0); + + return dispatched; +} + +static int bfq_dispatch_requests(struct request_queue *q, int force) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq; + int max_dispatch; + + bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); + if (bfqd->busy_queues == 0) + return 0; + + if (unlikely(force)) + return bfq_forced_dispatch(bfqd); + + if((bfqq = bfq_select_queue(bfqd)) == NULL) + return 0; + + max_dispatch = bfqd->bfq_quantum; + if (bfq_class_idle(bfqq)) + max_dispatch = 1; + + if (!bfq_bfqq_sync(bfqq)) + max_dispatch = bfqd->bfq_max_budget_async_rq; + + if (bfqq->dispatched >= max_dispatch) { + if (bfqd->busy_queues > 1) + return 0; + if (bfqq->dispatched >= 4 * max_dispatch) + return 0; + } + + if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) + return 0; + + bfq_clear_bfqq_wait_request(bfqq); + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + if (! bfq_dispatch_request(bfqd, bfqq)) + return 0; + + bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d" + "(max_disp %d)", bfqq->pid, max_dispatch); + + return 1; +} + +/* + * Task holds one reference to the queue, dropped when task exits. Each rq + * in-flight on this queue also holds a reference, dropped when rq is freed. + * + * Queue lock must be held here. + */ +static void bfq_put_queue(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; + + BUG_ON(atomic_read(&bfqq->ref) <= 0); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, + atomic_read(&bfqq->ref)); + if (!atomic_dec_and_test(&bfqq->ref)) + return; + + BUG_ON(rb_first(&bfqq->sort_list) != NULL); + BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); + BUG_ON(bfqq->entity.tree != NULL); + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqd->active_queue == bfqq); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); + + kmem_cache_free(bfq_pool, bfqq); +} + +static void bfq_put_cooperator(struct bfq_queue *bfqq) +{ + struct bfq_queue *__bfqq, *next; + + /* + * If this queue was scheduled to merge with another queue, be + * sure to drop the reference taken on that queue (and others in + * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. + */ + __bfqq = bfqq->new_bfqq; + while (__bfqq) { + if (__bfqq == bfqq) { + WARN(1, "bfqq->new_bfqq loop detected.\n"); + break; + } + next = __bfqq->new_bfqq; + bfq_put_queue(__bfqq); + __bfqq = next; + } +} + +/* Coop lock is taken in __bfq_exit_single_io_context() */ +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + if (bfqq == bfqd->active_queue) { + __bfq_bfqq_expire(bfqd, bfqq); + bfq_schedule_dispatch(bfqd); + } + + bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); +} + +/* + * Update the entity prio values; note that the new values will not + * be used until the next (re)activation. + */ +static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) +{ + struct task_struct *tsk = current; + int ioprio_class; + + if (!bfq_bfqq_prio_changed(bfqq)) + return; + + ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); + switch (ioprio_class) { + default: + printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class); + case IOPRIO_CLASS_NONE: + /* + * No prio set, inherit CPU scheduling settings. + */ + bfqq->entity.new_ioprio = task_nice_ioprio(tsk); + bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); + break; + case IOPRIO_CLASS_RT: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; + break; + case IOPRIO_CLASS_BE: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; + break; + case IOPRIO_CLASS_IDLE: + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; + bfqq->entity.new_ioprio = 7; + bfq_clear_bfqq_idle_window(bfqq); + break; + } + + bfqq->entity.ioprio_changed = 1; + + /* + * Keep track of original prio settings in case we have to temporarily + * elevate the priority of this queue. + */ + bfqq->org_ioprio = bfqq->entity.new_ioprio; + bfq_clear_bfqq_prio_changed(bfqq); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + struct bfq_queue *bfqq, *new_bfqq; + struct bfq_group *bfqg; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (unlikely(bfqd == NULL)) + return; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic->cfqq[BLK_RW_ASYNC]; + if (bfqq != NULL) { + bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, + sched_data); + new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, cic->ioc, + GFP_ATOMIC); + if (new_bfqq != NULL) { + cic->cfqq[BLK_RW_ASYNC] = new_bfqq; + bfq_log_bfqq(bfqd, bfqq, + "changed_ioprio: bfqq %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } + } + + bfqq = cic->cfqq[BLK_RW_SYNC]; + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) + bfq_mark_bfqq_prio_changed(bfqq); + + bfq_put_bfqd_unlock(bfqd, &flags); +} + +static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + pid_t pid, int is_sync) +{ + RB_CLEAR_NODE(&bfqq->entity.rb_node); + INIT_LIST_HEAD(&bfqq->fifo); + + atomic_set(&bfqq->ref, 0); + bfqq->bfqd = bfqd; + + bfq_mark_bfqq_prio_changed(bfqq); + + if (is_sync) { + if (!bfq_class_idle(bfqq)) + bfq_mark_bfqq_idle_window(bfqq); + bfq_mark_bfqq_sync(bfqq); + } + + /* Tentative initial value to trade off between thr and lat */ + bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; + bfqq->pid = pid; + + bfqq->raising_coeff = 1; + bfqq->last_rais_start_finish = 0; + bfqq->soft_rt_next_start = -1; +} + +static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int is_sync, + struct io_context *ioc, + gfp_t gfp_mask) +{ + struct bfq_queue *bfqq, *new_bfqq = NULL; + struct cfq_io_context *cic; + +retry: + cic = bfq_cic_lookup(bfqd, ioc); + /* cic always exists here */ + bfqq = cic_to_bfqq(cic, is_sync); + + /* + * Always try a new alloc if we fall back to the OOM bfqq + * originally, since it should just be a temporary situation. + */ + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = NULL; + if (new_bfqq != NULL) { + bfqq = new_bfqq; + new_bfqq = NULL; + } else if (gfp_mask & __GFP_WAIT) { + spin_unlock(&bfqd->eqm_lock); + spin_unlock_irq(bfqd->queue->queue_lock); + new_bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + spin_lock_irq(bfqd->queue->queue_lock); + spin_lock(&bfqd->eqm_lock); + if (new_bfqq != NULL) + goto retry; + } else { + bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + } + + if (bfqq != NULL) { + bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); + bfq_log_bfqq(bfqd, bfqq, "allocated"); + } else { + bfqq = &bfqd->oom_bfqq; + bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); + } + + bfq_init_prio_data(bfqq, ioc); + bfq_init_entity(&bfqq->entity, bfqg); + } + + if (new_bfqq != NULL) + kmem_cache_free(bfq_pool, new_bfqq); + + return bfqq; +} + +static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int ioprio_class, int ioprio) +{ + switch (ioprio_class) { + case IOPRIO_CLASS_RT: + return &bfqg->async_bfqq[0][ioprio]; + case IOPRIO_CLASS_BE: + return &bfqg->async_bfqq[1][ioprio]; + case IOPRIO_CLASS_IDLE: + return &bfqg->async_idle_bfqq; + default: + BUG(); + } +} + +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask) +{ + const int ioprio = task_ioprio(ioc); + const int ioprio_class = task_ioprio_class(ioc); + struct bfq_queue **async_bfqq = NULL; + struct bfq_queue *bfqq = NULL; + + if (!is_sync) { + async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, + ioprio); + bfqq = *async_bfqq; + } + + if (bfqq == NULL) + bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask); + + /* + * Pin the queue now that it's allocated, scheduler exit will prune it. + */ + if (!is_sync && *async_bfqq == NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfqq, atomic_read(&bfqq->ref)); + *async_bfqq = bfqq; + } + + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + return bfqq; +} + +static void bfq_update_io_thinktime(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + unsigned long elapsed = jiffies - cic->ttime.last_end_request; + unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); + + cic->ttime.ttime_samples = (7*cic->ttime.ttime_samples + 256) / 8; + cic->ttime.ttime_total = (7*cic->ttime.ttime_total + 256*ttime) / 8; + cic->ttime.ttime_mean = (cic->ttime.ttime_total + 128) / cic->ttime.ttime_samples; +} + +static void bfq_update_io_seektime(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *rq) +{ + sector_t sdist; + u64 total; + + if (bfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - bfqq->last_request_pos; + else + sdist = bfqq->last_request_pos - blk_rq_pos(rq); + + /* + * Don't allow the seek distance to get too large from the + * odd fragment, pagein, etc. + */ + if (bfqq->seek_samples == 0) /* first request, not really a seek */ + sdist = 0; + else if (bfqq->seek_samples <= 60) /* second & third seek */ + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); + else + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); + + bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; + bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; + total = bfqq->seek_total + (bfqq->seek_samples/2); + do_div(total, bfqq->seek_samples); + if (bfq_bfqq_coop(bfqq)) { + /* + * If the mean seektime increases for a (non-seeky) shared + * queue, some cooperator is likely to be idling too much. + * On the contrary, if it decreases, some cooperator has + * probably waked up. + * + */ + if ((sector_t)total < bfqq->seek_mean) + bfq_mark_bfqq_some_coop_idle(bfqq) ; + else if ((sector_t)total > bfqq->seek_mean) + bfq_clear_bfqq_some_coop_idle(bfqq) ; + } + bfqq->seek_mean = (sector_t)total; + + bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, + (u64)bfqq->seek_mean); +} + +/* + * Disable idle window if the process thinks too long or seeks so much that + * it doesn't matter. + */ +static void bfq_update_idle_window(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct cfq_io_context *cic) +{ + int enable_idle; + + /* Don't idle for async or idle io prio class. */ + if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) + return; + + /* Idle window just restored, statistics are meaningless. */ + if (bfq_bfqq_just_split(bfqq)) + return; + + enable_idle = bfq_bfqq_idle_window(bfqq); + + if (atomic_read(&cic->ioc->nr_tasks) == 0 || + bfqd->bfq_slice_idle == 0 || + (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && + bfqq->raising_coeff == 1)) + enable_idle = 0; + else if (bfq_sample_valid(cic->ttime.ttime_samples)) { + if (cic->ttime.ttime_mean > bfqd->bfq_slice_idle && + bfqq->raising_coeff == 1) + enable_idle = 0; + else + enable_idle = 1; + } + bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", + enable_idle); + + if (enable_idle) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); +} + +/* + * Called when a new fs request (rq) is added to bfqq. Check if there's + * something we should do about it. + */ +static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct request *rq) +{ + struct cfq_io_context *cic = RQ_CIC(rq); + + if (rq->cmd_flags & REQ_META) + bfqq->meta_pending++; + + bfq_update_io_thinktime(bfqd, cic); + bfq_update_io_seektime(bfqd, bfqq, rq); + if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || + !BFQQ_SEEKY(bfqq)) + bfq_update_idle_window(bfqd, bfqq, cic); + bfq_clear_bfqq_just_split(bfqq); + + bfq_log_bfqq(bfqd, bfqq, + "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", + bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), + (long long unsigned)bfqq->seek_mean); + + bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); + + if (bfqq == bfqd->active_queue) { + /* + * If there is just this request queued and the request + * is small, just exit. + * In this way, if the disk is being idled to wait for a new + * request from the active queue, we avoid unplugging the + * device now. + * + * By doing so, we spare the disk to be committed + * to serve just a small request. On the contrary, we wait for + * the block layer to decide when to unplug the device: + * hopefully, new requests will be merged to this + * one quickly, then the device will be unplugged + * and larger requests will be dispatched. + */ + if (bfqq->queued[rq_is_sync(rq)] == 1 && + blk_rq_sectors(rq) < 32) { + return; + } + if (bfq_bfqq_wait_request(bfqq)) { + /* + * If we are waiting for a request for this queue, let + * it rip immediately and flag that we must not expire + * this queue just now. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + /* + * Here we can safely expire the queue, in + * case of budget timeout, without wasting + * guarantees + */ + if (bfq_bfqq_budget_timeout(bfqq)) + bfq_bfqq_expire(bfqd, bfqq, 0, + BFQ_BFQQ_BUDGET_TIMEOUT); + __blk_run_queue(bfqd->queue); + } + } +} + +static void bfq_insert_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; + + assert_spin_locked(bfqd->queue->queue_lock); + + /* + * An unplug may trigger a requeue of a request from the device + * driver: make sure we are in process context while trying to + * merge two bfq_queues. + */ + spin_lock(&bfqd->eqm_lock); + if (!in_interrupt() && + (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true))) { + if (cic_to_bfqq(RQ_CIC(rq), 1) != bfqq) + new_bfqq = cic_to_bfqq(RQ_CIC(rq), 1); + /* + * Release the request's reference to the old bfqq + * and make sure one is taken to the shared queue. + */ + new_bfqq->allocated[rq_data_dir(rq)]++; + bfqq->allocated[rq_data_dir(rq)]--; + atomic_inc(&new_bfqq->ref); + bfq_put_queue(bfqq); + if (cic_to_bfqq(RQ_CIC(rq), 1) == bfqq) + bfq_merge_bfqqs(bfqd, RQ_CIC(rq), bfqq, new_bfqq); + rq->elevator_private[1] = new_bfqq; + bfqq = new_bfqq; + } + spin_unlock(&bfqd->eqm_lock); + + bfq_init_prio_data(bfqq, RQ_CIC(rq)->ioc); + + bfq_add_rq_rb(rq); + + /* + * Here a newly-created bfq_queue has already started a weight-raising + * period: clear raising_time_left to prevent bfq_bfqq_save_state() + * from assigning it a full weight-raising period. See the detailed + * comments about this field in bfq_init_icq(). + */ + if (bfqq->cic != NULL) + bfqq->cic->raising_time_left = 0; + rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); + list_add_tail(&rq->queuelist, &bfqq->fifo); + + bfq_rq_enqueued(bfqd, bfqq, rq); +} + +static void bfq_update_hw_tag(struct bfq_data *bfqd) +{ + bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, + bfqd->rq_in_driver); + + if (bfqd->hw_tag == 1) + return; + + /* + * This sample is valid if the number of outstanding requests + * is large enough to allow a queueing behavior. Note that the + * sum is not exact, as it's not taking into account deactivated + * requests. + */ + if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) + return; + + if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) + return; + + bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; + bfqd->max_rq_in_driver = 0; + bfqd->hw_tag_samples = 0; +} + +static void bfq_completed_request(struct request_queue *q, struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + const int sync = rq_is_sync(rq); + + bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)", + blk_rq_sectors(rq), sync); + + bfq_update_hw_tag(bfqd); + + WARN_ON(!bfqd->rq_in_driver); + WARN_ON(!bfqq->dispatched); + bfqd->rq_in_driver--; + bfqq->dispatched--; + + if (bfq_bfqq_sync(bfqq)) + bfqd->sync_flight--; + + if (sync) + RQ_CIC(rq)->ttime.last_end_request = jiffies; + + /* + * If this is the active queue, check if it needs to be expired, + * or if we want to idle in case it has no pending requests. + */ + if (bfqd->active_queue == bfqq) { + int budg_timeout = bfq_may_expire_for_budg_timeout(bfqq); + if (bfq_bfqq_budget_new(bfqq)) + bfq_set_budget_timeout(bfqd); + + if (bfq_bfqq_must_idle(bfqq, budg_timeout)) + bfq_arm_slice_timer(bfqd); + else if (budg_timeout) + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); + } + + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); +} + +static inline int __bfq_may_queue(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { + bfq_clear_bfqq_must_alloc(bfqq); + return ELV_MQUEUE_MUST; + } + + return ELV_MQUEUE_MAY; +} + +static int bfq_may_queue(struct request_queue *q, int rw) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + /* + * Don't force setup of a queue from here, as a call to may_queue + * does not necessarily imply that a request actually will be queued. + * So just lookup a possibly existing queue, or return 'may queue' + * if that fails. + */ + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return ELV_MQUEUE_MAY; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic_to_bfqq(cic, rw_is_sync(rw)); + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) { + bfq_init_prio_data(bfqq, cic->ioc); + + return __bfq_may_queue(bfqq); + } + + return ELV_MQUEUE_MAY; +} + +/* + * Queue lock held here. + */ +static void bfq_put_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + if (bfqq != NULL) { + const int rw = rq_data_dir(rq); + + BUG_ON(!bfqq->allocated[rw]); + bfqq->allocated[rw]--; + + put_io_context(RQ_CIC(rq)->ioc); + + rq->elevator_private[0] = NULL; + rq->elevator_private[1] = NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +/* + * Returns NULL if a new bfqq should be allocated, or the old bfqq if this + * was the last process referring to said bfqq. + */ +static struct bfq_queue * +bfq_split_bfqq(struct cfq_io_context *cic, struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); + + put_io_context(cic->ioc); + + if (bfqq_process_refs(bfqq) == 1) { + bfqq->pid = current->pid; + bfq_clear_bfqq_some_coop_idle(bfqq); + bfq_clear_bfqq_coop(bfqq); + bfq_clear_bfqq_split_coop(bfqq); + return bfqq; + } + + cic_set_bfqq(cic, NULL, 1); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); + return NULL; +} + +/* + * Allocate bfq data structures associated with this request. + */ +static int bfq_set_request(struct request_queue *q, struct request *rq, + gfp_t gfp_mask) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + const int rw = rq_data_dir(rq); + const int is_sync = rq_is_sync(rq); + struct bfq_queue *bfqq; + struct bfq_group *bfqg; + unsigned long flags; + bool split = false; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + cic = bfq_get_io_context(bfqd, gfp_mask); + + spin_lock_irqsave(q->queue_lock, flags); + + if (cic == NULL) + goto queue_fail; + + bfqg = bfq_cic_update_cgroup(cic); + + spin_lock(&bfqd->eqm_lock); + +new_queue: + bfqq = cic_to_bfqq(cic, is_sync); + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = bfq_get_queue(bfqd, bfqg, is_sync, cic->ioc, gfp_mask); + cic_set_bfqq(cic, bfqq, is_sync); + } else { + /* If the queue was seeky for too long, break it apart. */ + if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); + bfqq = bfq_split_bfqq(cic, bfqq); + split = true; + if (!bfqq) + goto new_queue; + } + } + + bfqq->allocated[rw]++; + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + rq->elevator_private[0] = cic; + rq->elevator_private[1] = bfqq; + + /* + * If a bfq_queue has only one process reference, it is owned + * by only one cfq_io_context: we can set the cic field of the + * bfq_queue to the address of that structure. Also, if the + * queue has just been split, mark a flag so that the + * information is available to the other scheduler hooks. + */ + if (bfqq_process_refs(bfqq) == 1) { + bfqq->cic = cic; + if (split) { + bfq_mark_bfqq_just_split(bfqq); + /* + * If the queue has just been split from a shared queue, + * restore the idle window and the possible weight + * raising period. + */ + bfq_bfqq_resume_state(bfqq, cic); + } + } + + spin_unlock(&bfqd->eqm_lock); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 0; + +queue_fail: + if (cic != NULL) + put_io_context(cic->ioc); + + bfq_schedule_dispatch(bfqd); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 1; +} + +static void bfq_kick_queue(struct work_struct *work) +{ + struct bfq_data *bfqd = + container_of(work, struct bfq_data, unplug_work); + struct request_queue *q = bfqd->queue; + + spin_lock_irq(q->queue_lock); + __blk_run_queue(q); + spin_unlock_irq(q->queue_lock); +} + +/* + * Handler of the expiration of the timer running if the active_queue + * is idling inside its time slice. + */ +static void bfq_idle_slice_timer(unsigned long data) +{ + struct bfq_data *bfqd = (struct bfq_data *)data; + struct bfq_queue *bfqq; + unsigned long flags; + enum bfqq_expiration reason; + + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + + bfqq = bfqd->active_queue; + /* + * Theoretical race here: active_queue can be NULL or different + * from the queue that was idling if the timer handler spins on + * the queue_lock and a new request arrives for the current + * queue and there is a full dispatch cycle that changes the + * active_queue. This can hardly happen, but in the worst case + * we just expire a queue too early. + */ + if (bfqq != NULL) { + bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); + if (bfq_bfqq_budget_timeout(bfqq)) + /* + * Also here the queue can be safely expired + * for budget timeout without wasting + * guarantees + */ + reason = BFQ_BFQQ_BUDGET_TIMEOUT; + else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) + /* + * The queue may not be empty upon timer expiration, + * because we may not disable the timer when the first + * request of the active queue arrives during + * disk idling + */ + reason = BFQ_BFQQ_TOO_IDLE; + else + goto schedule_dispatch; + + bfq_bfqq_expire(bfqd, bfqq, 1, reason); + } + +schedule_dispatch: + bfq_schedule_dispatch(bfqd); + + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); +} + +static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) +{ + del_timer_sync(&bfqd->idle_slice_timer); + cancel_work_sync(&bfqd->unplug_work); +} + +static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, + struct bfq_queue **bfqq_ptr) +{ + struct bfq_group *root_group = bfqd->root_group; + struct bfq_queue *bfqq = *bfqq_ptr; + + bfq_log(bfqd, "put_async_bfqq: %p", bfqq); + if (bfqq != NULL) { + bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); + bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + *bfqq_ptr = NULL; + } +} + +/* + * Release all the bfqg references to its async queues. If we are + * deallocating the group these queues may still contain requests, so + * we reparent them to the root cgroup (i.e., the only one that will + * exist for sure untill all the requests on a device are gone). + */ +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_BE_NR; j++) + __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); + + __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); +} + +static void bfq_exit_queue(struct elevator_queue *e) +{ + struct bfq_data *bfqd = e->elevator_data; + struct request_queue *q = bfqd->queue; + struct bfq_queue *bfqq, *n; + struct cfq_io_context *cic; + + bfq_shutdown_timer_wq(bfqd); + + spin_lock_irq(q->queue_lock); + + while (!list_empty(&bfqd->cic_list)) { + cic = list_entry(bfqd->cic_list.next, struct cfq_io_context, + queue_list); + __bfq_exit_single_io_context(bfqd, cic); + } + + BUG_ON(bfqd->active_queue != NULL); + list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) + bfq_deactivate_bfqq(bfqd, bfqq, 0); + + bfq_disconnect_groups(bfqd); + spin_unlock_irq(q->queue_lock); + + bfq_shutdown_timer_wq(bfqd); + + spin_lock(&cic_index_lock); + ida_remove(&cic_index_ida, bfqd->cic_index); + spin_unlock(&cic_index_lock); + + /* Wait for cic->key accessors to exit their grace periods. */ + synchronize_rcu(); + + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + bfq_free_root_group(bfqd); + kfree(bfqd); +} + +static int bfq_alloc_cic_index(void) +{ + int index, error; + + do { + if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&cic_index_lock); + error = ida_get_new(&cic_index_ida, &index); + spin_unlock(&cic_index_lock); + if (error && error != -EAGAIN) + return error; + } while (error); + + return index; +} + +static void *bfq_init_queue(struct request_queue *q) +{ + struct bfq_group *bfqg; + struct bfq_data *bfqd; + int i; + + i = bfq_alloc_cic_index(); + if (i < 0) + return NULL; + + bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node); + if (bfqd == NULL) + return NULL; + + bfqd->cic_index = i; + + /* + * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. + */ + bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); + atomic_inc(&bfqd->oom_bfqq.ref); + + spin_lock_init(&bfqd->eqm_lock); + INIT_LIST_HEAD(&bfqd->cic_list); + + bfqd->queue = q; + + bfqg = bfq_alloc_root_group(bfqd, q->node); + if (bfqg == NULL) { + kfree(bfqd); + return NULL; + } + + bfqd->root_group = bfqg; + + init_timer(&bfqd->idle_slice_timer); + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + bfqd->idle_slice_timer.data = (unsigned long)bfqd; + + bfqd->rq_pos_tree = RB_ROOT; + + INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); + + bfqd->hw_tag = -1; + + bfqd->bfq_max_budget = bfq_default_max_budget; + + bfqd->bfq_quantum = bfq_quantum; + bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; + bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; + bfqd->bfq_back_max = bfq_back_max; + bfqd->bfq_back_penalty = bfq_back_penalty; + bfqd->bfq_slice_idle = bfq_slice_idle; + bfqd->bfq_class_idle_last_service = 0; + bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; + bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; + bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; + + bfqd->low_latency = true; + + bfqd->bfq_raising_coeff = 20; + bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300); + bfqd->bfq_raising_max_time = 0; + bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000); + bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500); + bfqd->bfq_raising_max_softrt_rate = 7000; + + /* Initially estimate the device's peak rate as the reference rate */ + if (blk_queue_nonrot(bfqd->queue)) { + bfqd->RT_prod = R_nonrot * T_nonrot; + bfqd->peak_rate = R_nonrot; + } else { + bfqd->RT_prod = R_rot * T_rot; + bfqd->peak_rate = R_rot; + } + + return bfqd; +} + +static void bfq_slab_kill(void) +{ + if (bfq_pool != NULL) + kmem_cache_destroy(bfq_pool); + if (bfq_ioc_pool != NULL) + kmem_cache_destroy(bfq_ioc_pool); +} + +static int __init bfq_slab_setup(void) +{ + bfq_pool = KMEM_CACHE(bfq_queue, 0); + if (bfq_pool == NULL) + goto fail; + + bfq_ioc_pool = kmem_cache_create("bfq_io_context", + sizeof(struct cfq_io_context), + __alignof__(struct cfq_io_context), + 0, NULL); + if (bfq_ioc_pool == NULL) + goto fail; + + return 0; +fail: + bfq_slab_kill(); + return -ENOMEM; +} + +static ssize_t bfq_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count) +{ + unsigned long new_val; + int ret = strict_strtoul(page, 10, &new_val); + + if (ret == 0) + *var = new_val; + + return count; +} + +static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page) +{ + struct bfq_data *bfqd = e->elevator_data; + return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ? + jiffies_to_msecs(bfqd->bfq_raising_max_time) : + jiffies_to_msecs(bfq_wrais_duration(bfqd))); +} + +static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) +{ + struct bfq_queue *bfqq; + struct bfq_data *bfqd = e->elevator_data; + ssize_t num_char = 0; + + spin_lock_irq(bfqd->queue->queue_lock); + + num_char += sprintf(page + num_char, "Active:\n"); + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_rais_start_finish), + jiffies_to_msecs(bfqq->raising_cur_max_time)); + } + num_char += sprintf(page + num_char, "Idle:\n"); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_rais_start_finish), + jiffies_to_msecs(bfqq->raising_cur_max_time)); + } + + spin_unlock_irq(bfqd->queue->queue_lock); + + return num_char; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return bfq_var_show(__data, (page)); \ +} +SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); +SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); +SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); +SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); +SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); +SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); +SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); +SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0); +SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); +SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); +SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); +SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0); +SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1); +SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time, + 1); +SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show, + bfqd->bfq_raising_min_inter_arr_async, + 1); +SHOW_FUNCTION(bfq_raising_max_softrt_rate_show, + bfqd->bfq_raising_max_softrt_rate, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t \ +__FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned long uninitialized_var(__data); \ + int ret = bfq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); +STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); +STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, + INT_MAX, 0); +STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, + 1, INT_MAX, 0); +STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1, + INT_MAX, 0); +STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_raising_min_idle_time_store, + &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_raising_min_inter_arr_async_store, + &bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_raising_max_softrt_rate_store, + &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +/* do nothing for the moment */ +static ssize_t bfq_weights_store(struct elevator_queue *e, + const char *page, size_t count) +{ + return count; +} + +static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) +{ + u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) + return bfq_calc_max_budget(bfqd->peak_rate, timeout); + else + return bfq_default_max_budget; +} + +static ssize_t bfq_max_budget_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + else { + if (__data > INT_MAX) + __data = INT_MAX; + bfqd->bfq_max_budget = __data; + } + + bfqd->bfq_user_max_budget = __data; + + return ret; +} + +static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data < 1) + __data = 1; + else if (__data > INT_MAX) + __data = INT_MAX; + + bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); + if (bfqd->bfq_user_max_budget == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + + return ret; +} + +static ssize_t bfq_low_latency_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data > 1) + __data = 1; + if (__data == 0 && bfqd->low_latency != 0) + bfq_end_raising(bfqd); + bfqd->low_latency = __data; + + return ret; +} + +#define BFQ_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) + +static struct elv_fs_entry bfq_attrs[] = { + BFQ_ATTR(quantum), + BFQ_ATTR(fifo_expire_sync), + BFQ_ATTR(fifo_expire_async), + BFQ_ATTR(back_seek_max), + BFQ_ATTR(back_seek_penalty), + BFQ_ATTR(slice_idle), + BFQ_ATTR(max_budget), + BFQ_ATTR(max_budget_async_rq), + BFQ_ATTR(timeout_sync), + BFQ_ATTR(timeout_async), + BFQ_ATTR(low_latency), + BFQ_ATTR(raising_coeff), + BFQ_ATTR(raising_max_time), + BFQ_ATTR(raising_rt_max_time), + BFQ_ATTR(raising_min_idle_time), + BFQ_ATTR(raising_min_inter_arr_async), + BFQ_ATTR(raising_max_softrt_rate), + BFQ_ATTR(weights), + __ATTR_NULL +}; + +static struct elevator_type iosched_bfq = { + .ops = { + .elevator_merge_fn = bfq_merge, + .elevator_merged_fn = bfq_merged_request, + .elevator_merge_req_fn = bfq_merged_requests, + .elevator_allow_merge_fn = bfq_allow_merge, + .elevator_dispatch_fn = bfq_dispatch_requests, + .elevator_add_req_fn = bfq_insert_request, + .elevator_activate_req_fn = bfq_activate_request, + .elevator_deactivate_req_fn = bfq_deactivate_request, + .elevator_completed_req_fn = bfq_completed_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_set_req_fn = bfq_set_request, + .elevator_put_req_fn = bfq_put_request, + .elevator_may_queue_fn = bfq_may_queue, + .elevator_init_fn = bfq_init_queue, + .elevator_exit_fn = bfq_exit_queue, + .trim = bfq_free_io_context, + }, + .elevator_attrs = bfq_attrs, + .elevator_name = "bfq", + .elevator_owner = THIS_MODULE, +}; + +static int __init bfq_init(void) +{ + /* + * Can be 0 on HZ < 1000 setups. + */ + if (bfq_slice_idle == 0) + bfq_slice_idle = 1; + + if (bfq_timeout_async == 0) + bfq_timeout_async = 1; + + if (bfq_slab_setup()) + return -ENOMEM; + + elv_register(&iosched_bfq); + + return 0; +} + +static void __exit bfq_exit(void) +{ + DECLARE_COMPLETION_ONSTACK(all_gone); + elv_unregister(&iosched_bfq); + bfq_ioc_gone = &all_gone; + /* bfq_ioc_gone's update must be visible before reading bfq_ioc_count */ + smp_wmb(); + if (elv_ioc_count_read(bfq_ioc_count) != 0) + wait_for_completion(&all_gone); + ida_destroy(&cic_index_ida); + bfq_slab_kill(); +} + +module_init(bfq_init); +module_exit(bfq_exit); + +MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 index 00000000000..39779a8b94e --- /dev/null +++ b/block/bfq-sched.c @@ -0,0 +1,1040 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +#ifdef CONFIG_CGROUP_BFQIO +#define for_each_entity(entity) \ + for (; entity != NULL; entity = entity->parent) + +#define for_each_entity_safe(entity, parent) \ + for (; entity && ({ parent = entity->parent; 1; }); entity = parent) + +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd); + +static inline void bfq_update_budget(struct bfq_entity *next_active) +{ + struct bfq_entity *bfqg_entity; + struct bfq_group *bfqg; + struct bfq_sched_data *group_sd; + + BUG_ON(next_active == NULL); + + group_sd = next_active->sched_data; + + bfqg = container_of(group_sd, struct bfq_group, sched_data); + /* + * bfq_group's my_entity field is not NULL only if the group + * is not the root group. We must not touch the root entity + * as it must never become an active entity. + */ + bfqg_entity = bfqg->my_entity; + if (bfqg_entity != NULL) + bfqg_entity->budget = next_active->budget; +} + +static int bfq_update_next_active(struct bfq_sched_data *sd) +{ + struct bfq_entity *next_active; + + if (sd->active_entity != NULL) + /* will update/requeue at the end of service */ + return 0; + + /* + * NOTE: this can be improved in many ways, such as returning + * 1 (and thus propagating upwards the update) only when the + * budget changes, or caching the bfqq that will be scheduled + * next from this subtree. By now we worry more about + * correctness than about performance... + */ + next_active = bfq_lookup_next_entity(sd, 0, NULL); + sd->next_active = next_active; + + if (next_active != NULL) + bfq_update_budget(next_active); + + return 1; +} + +static inline void bfq_check_next_active(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ + BUG_ON(sd->next_active != entity); +} +#else +#define for_each_entity(entity) \ + for (; entity != NULL; entity = NULL) + +#define for_each_entity_safe(entity, parent) \ + for (parent = NULL; entity != NULL; entity = parent) + +static inline int bfq_update_next_active(struct bfq_sched_data *sd) +{ + return 0; +} + +static inline void bfq_check_next_active(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ +} + +static inline void bfq_update_budget(struct bfq_entity *next_active) +{ +} +#endif + +/* + * Shift for timestamp calculations. This actually limits the maximum + * service allowed in one timestamp delta (small shift values increase it), + * the maximum total weight that can be used for the queues in the system + * (big shift values increase it), and the period of virtual time wraparounds. + */ +#define WFQ_SERVICE_SHIFT 22 + +/** + * bfq_gt - compare two timestamps. + * @a: first ts. + * @b: second ts. + * + * Return @a > @b, dealing with wrapping correctly. + */ +static inline int bfq_gt(u64 a, u64 b) +{ + return (s64)(a - b) > 0; +} + +static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = NULL; + + BUG_ON(entity == NULL); + + if (entity->my_sched_data == NULL) + bfqq = container_of(entity, struct bfq_queue, entity); + + return bfqq; +} + + +/** + * bfq_delta - map service into the virtual time domain. + * @service: amount of service. + * @weight: scale factor (weight of an entity or weight sum). + */ +static inline u64 bfq_delta(unsigned long service, + unsigned long weight) +{ + u64 d = (u64)service << WFQ_SERVICE_SHIFT; + + do_div(d, weight); + return d; +} + +/** + * bfq_calc_finish - assign the finish time to an entity. + * @entity: the entity to act upon. + * @service: the service to be charged to the entity. + */ +static inline void bfq_calc_finish(struct bfq_entity *entity, + unsigned long service) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(entity->weight == 0); + + entity->finish = entity->start + + bfq_delta(service, entity->weight); + + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: serv %lu, w %d", + service, entity->weight); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: start %llu, finish %llu, delta %llu", + entity->start, entity->finish, + bfq_delta(service, entity->weight)); + } +} + +/** + * bfq_entity_of - get an entity from a node. + * @node: the node field of the entity. + * + * Convert a node pointer to the relative entity. This is used only + * to simplify the logic of some functions and not as the generic + * conversion mechanism because, e.g., in the tree walking functions, + * the check for a %NULL value would be redundant. + */ +static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) +{ + struct bfq_entity *entity = NULL; + + if (node != NULL) + entity = rb_entry(node, struct bfq_entity, rb_node); + + return entity; +} + +/** + * bfq_extract - remove an entity from a tree. + * @root: the tree root. + * @entity: the entity to remove. + */ +static inline void bfq_extract(struct rb_root *root, + struct bfq_entity *entity) +{ + BUG_ON(entity->tree != root); + + entity->tree = NULL; + rb_erase(&entity->rb_node, root); +} + +/** + * bfq_idle_extract - extract an entity from the idle tree. + * @st: the service tree of the owning @entity. + * @entity: the entity being removed. + */ +static void bfq_idle_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *next; + + BUG_ON(entity->tree != &st->idle); + + if (entity == st->first_idle) { + next = rb_next(&entity->rb_node); + st->first_idle = bfq_entity_of(next); + } + + if (entity == st->last_idle) { + next = rb_prev(&entity->rb_node); + st->last_idle = bfq_entity_of(next); + } + + bfq_extract(&st->idle, entity); + + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +} + +/** + * bfq_insert - generic tree insertion. + * @root: tree root. + * @entity: entity to insert. + * + * This is used for the idle and the active tree, since they are both + * ordered by finish time. + */ +static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) +{ + struct bfq_entity *entry; + struct rb_node **node = &root->rb_node; + struct rb_node *parent = NULL; + + BUG_ON(entity->tree != NULL); + + while (*node != NULL) { + parent = *node; + entry = rb_entry(parent, struct bfq_entity, rb_node); + + if (bfq_gt(entry->finish, entity->finish)) + node = &parent->rb_left; + else + node = &parent->rb_right; + } + + rb_link_node(&entity->rb_node, parent, node); + rb_insert_color(&entity->rb_node, root); + + entity->tree = root; +} + +/** + * bfq_update_min - update the min_start field of a entity. + * @entity: the entity to update. + * @node: one of its children. + * + * This function is called when @entity may store an invalid value for + * min_start due to updates to the active tree. The function assumes + * that the subtree rooted at @node (which may be its left or its right + * child) has a valid min_start value. + */ +static inline void bfq_update_min(struct bfq_entity *entity, + struct rb_node *node) +{ + struct bfq_entity *child; + + if (node != NULL) { + child = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entity->min_start, child->min_start)) + entity->min_start = child->min_start; + } +} + +/** + * bfq_update_active_node - recalculate min_start. + * @node: the node to update. + * + * @node may have changed position or one of its children may have moved, + * this function updates its min_start value. The left and right subtrees + * are assumed to hold a correct min_start value. + */ +static inline void bfq_update_active_node(struct rb_node *node) +{ + struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); + + entity->min_start = entity->start; + bfq_update_min(entity, node->rb_right); + bfq_update_min(entity, node->rb_left); +} + +/** + * bfq_update_active_tree - update min_start for the whole active tree. + * @node: the starting node. + * + * @node must be the deepest modified node after an update. This function + * updates its min_start using the values held by its children, assuming + * that they did not change, and then updates all the nodes that may have + * changed in the path to the root. The only nodes that may have changed + * are the ones in the path or their siblings. + */ +static void bfq_update_active_tree(struct rb_node *node) +{ + struct rb_node *parent; + +up: + bfq_update_active_node(node); + + parent = rb_parent(node); + if (parent == NULL) + return; + + if (node == parent->rb_left && parent->rb_right != NULL) + bfq_update_active_node(parent->rb_right); + else if (parent->rb_left != NULL) + bfq_update_active_node(parent->rb_left); + + node = parent; + goto up; +} + +/** + * bfq_active_insert - insert an entity in the active tree of its group/device. + * @st: the service tree of the entity. + * @entity: the entity being inserted. + * + * The active tree is ordered by finish time, but an extra key is kept + * per each node, containing the minimum value for the start times of + * its children (and the node itself), so it's possible to search for + * the eligible node with the lowest finish time in logarithmic time. + */ +static void bfq_active_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node = &entity->rb_node; + + bfq_insert(&st->active, entity); + + if (node->rb_left != NULL) + node = node->rb_left; + else if (node->rb_right != NULL) + node = node->rb_right; + + bfq_update_active_tree(node); + + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); +} + +/** + * bfq_ioprio_to_weight - calc a weight from an ioprio. + * @ioprio: the ioprio value to convert. + */ +static unsigned short bfq_ioprio_to_weight(int ioprio) +{ + WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); + return IOPRIO_BE_NR - ioprio; +} + +/** + * bfq_weight_to_ioprio - calc an ioprio from a weight. + * @weight: the weight value to convert. + * + * To preserve as mush as possible the old only-ioprio user interface, + * 0 is used as an escape ioprio value for weights (numerically) equal or + * larger than IOPRIO_BE_NR + */ +static unsigned short bfq_weight_to_ioprio(int weight) +{ + WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); + return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; +} + +static inline void bfq_get_entity(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + if (bfqq != NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + } +} + +/** + * bfq_find_deepest - find the deepest node that an extraction can modify. + * @node: the node being removed. + * + * Do the first step of an extraction in an rb tree, looking for the + * node that will replace @node, and returning the deepest node that + * the following modifications to the tree can touch. If @node is the + * last node in the tree return %NULL. + */ +static struct rb_node *bfq_find_deepest(struct rb_node *node) +{ + struct rb_node *deepest; + + if (node->rb_right == NULL && node->rb_left == NULL) + deepest = rb_parent(node); + else if (node->rb_right == NULL) + deepest = node->rb_left; + else if (node->rb_left == NULL) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right != NULL) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/** + * bfq_active_extract - remove an entity from the active tree. + * @st: the service_tree containing the tree. + * @entity: the entity being removed. + */ +static void bfq_active_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node; + + node = bfq_find_deepest(&entity->rb_node); + bfq_extract(&st->active, entity); + + if (node != NULL) + bfq_update_active_tree(node); + + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +} + +/** + * bfq_idle_insert - insert an entity into the idle tree. + * @st: the service tree containing the tree. + * @entity: the entity to insert. + */ +static void bfq_idle_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) + st->first_idle = entity; + if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) + st->last_idle = entity; + + bfq_insert(&st->idle, entity); + + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); +} + +/** + * bfq_forget_entity - remove an entity from the wfq trees. + * @st: the service tree. + * @entity: the entity being removed. + * + * Update the device status and forget everything about @entity, putting + * the device reference to it, if it is a queue. Entities belonging to + * groups are not refcounted. + */ +static void bfq_forget_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(!entity->on_st); + + entity->on_st = 0; + st->wsum -= entity->weight; + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +/** + * bfq_put_idle_entity - release the idle tree ref of an entity. + * @st: service tree for the entity. + * @entity: the entity being released. + */ +static void bfq_put_idle_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + bfq_idle_extract(st, entity); + bfq_forget_entity(st, entity); +} + +/** + * bfq_forget_idle - update the idle tree if necessary. + * @st: the service tree to act upon. + * + * To preserve the global O(log N) complexity we only remove one entry here; + * as the idle tree will not grow indefinitely this can be done safely. + */ +static void bfq_forget_idle(struct bfq_service_tree *st) +{ + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && + !bfq_gt(last_idle->finish, st->vtime)) { + /* + * Forget the whole idle tree, increasing the vtime past + * the last finish time of idle entities. + */ + st->vtime = last_idle->finish; + } + + if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) + bfq_put_idle_entity(st, first_idle); +} + +static struct bfq_service_tree * +__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, + struct bfq_entity *entity) +{ + struct bfq_service_tree *new_st = old_st; + + if (entity->ioprio_changed) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(old_st->wsum < entity->weight); + old_st->wsum -= entity->weight; + + if (entity->new_weight != entity->orig_weight) { + entity->orig_weight = entity->new_weight; + entity->ioprio = + bfq_weight_to_ioprio(entity->orig_weight); + } else if (entity->new_ioprio != entity->ioprio) { + entity->ioprio = entity->new_ioprio; + entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + } else + entity->new_weight = entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + + entity->ioprio_class = entity->new_ioprio_class; + entity->ioprio_changed = 0; + + /* + * NOTE: here we may be changing the weight too early, + * this will cause unfairness. The correct approach + * would have required additional complexity to defer + * weight changes to the proper time instants (i.e., + * when entity->finish <= old_st->vtime). + */ + new_st = bfq_entity_service_tree(entity); + entity->weight = entity->orig_weight * + (bfqq != NULL ? bfqq->raising_coeff : 1); + new_st->wsum += entity->weight; + + if (new_st != old_st) + entity->start = new_st->vtime; + } + + return new_st; +} + +/** + * bfq_bfqq_served - update the scheduler status after selection for service. + * @bfqq: the queue being served. + * @served: bytes to transfer. + * + * NOTE: this can be optimized, as the timestamps of upper level entities + * are synchronized every time a new bfqq is selected for service. By now, + * we keep it to better check consistency. + */ +static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st; + + for_each_entity(entity) { + st = bfq_entity_service_tree(entity); + + entity->service += served; + BUG_ON(entity->service > entity->budget); + BUG_ON(st->wsum == 0); + + st->vtime += bfq_delta(served, st->wsum); + bfq_forget_idle(st); + } + bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); +} + +/** + * bfq_bfqq_charge_full_budget - set the service to the entity budget. + * @bfqq: the queue that needs a service update. + * + * When it's not possible to be fair in the service domain, because + * a queue is not consuming its budget fast enough (the meaning of + * fast depends on the timeout parameter), we charge it a full + * budget. In this way we should obtain a sort of time-domain + * fairness among all the seeky/slow queues. + */ +static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); + + bfq_bfqq_served(bfqq, entity->budget - entity->service); +} + +/** + * __bfq_activate_entity - activate an entity. + * @entity: the entity being activated. + * + * Called whenever an entity is activated, i.e., it is not active and one + * of its children receives a new request, or has to be reactivated due to + * budget exhaustion. It uses the current budget of the entity (and the + * service received if @entity is active) of the queue to calculate its + * timestamps. + */ +static void __bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + + if (entity == sd->active_entity) { + BUG_ON(entity->tree != NULL); + /* + * If we are requeueing the current entity we have + * to take care of not charging to it service it has + * not received. + */ + bfq_calc_finish(entity, entity->service); + entity->start = entity->finish; + sd->active_entity = NULL; + } else if (entity->tree == &st->active) { + /* + * Requeueing an entity due to a change of some + * next_active entity below it. We reuse the old + * start time. + */ + bfq_active_extract(st, entity); + } else if (entity->tree == &st->idle) { + /* + * Must be on the idle tree, bfq_idle_extract() will + * check for that. + */ + bfq_idle_extract(st, entity); + entity->start = bfq_gt(st->vtime, entity->finish) ? + st->vtime : entity->finish; + } else { + /* + * The finish time of the entity may be invalid, and + * it is in the past for sure, otherwise the queue + * would have been on the idle tree. + */ + entity->start = st->vtime; + st->wsum += entity->weight; + bfq_get_entity(entity); + + BUG_ON(entity->on_st); + entity->on_st = 1; + } + + st = __bfq_entity_update_weight_prio(st, entity); + bfq_calc_finish(entity, entity->budget); + bfq_active_insert(st, entity); +} + +/** + * bfq_activate_entity - activate an entity and its ancestors if necessary. + * @entity: the entity to activate. + * + * Activate @entity and all the entities on the path from it to the root. + */ +static void bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd; + + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_active(sd)) + /* + * No need to propagate the activation to the + * upper entities, as they will be updated when + * the active entity is rescheduled. + */ + break; + } +} + +/** + * __bfq_deactivate_entity - deactivate an entity from its service tree. + * @entity: the entity to deactivate. + * @requeue: if false, the entity will not be put into the idle tree. + * + * Deactivate an entity, independently from its previous state. If the + * entity was not on a service tree just return, otherwise if it is on + * any scheduler tree, extract it from that tree, and if necessary + * and if the caller did not specify @requeue, put it on the idle tree. + * + * Return %1 if the caller should update the entity hierarchy, i.e., + * if the entity was under service or if it was the next_active for + * its sched_data; return %0 otherwise. + */ +static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + int was_active = entity == sd->active_entity; + int ret = 0; + + if (!entity->on_st) + return 0; + + BUG_ON(was_active && entity->tree != NULL); + + if (was_active) { + bfq_calc_finish(entity, entity->service); + sd->active_entity = NULL; + } else if (entity->tree == &st->active) + bfq_active_extract(st, entity); + else if (entity->tree == &st->idle) + bfq_idle_extract(st, entity); + else if (entity->tree != NULL) + BUG(); + + if (was_active || sd->next_active == entity) + ret = bfq_update_next_active(sd); + + if (!requeue || !bfq_gt(entity->finish, st->vtime)) + bfq_forget_entity(st, entity); + else + bfq_idle_insert(st, entity); + + BUG_ON(sd->active_entity == entity); + BUG_ON(sd->next_active == entity); + + return ret; +} + +/** + * bfq_deactivate_entity - deactivate an entity. + * @entity: the entity to deactivate. + * @requeue: true if the entity can be put on the idle tree + */ +static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd; + struct bfq_entity *parent; + + for_each_entity_safe(entity, parent) { + sd = entity->sched_data; + + if (!__bfq_deactivate_entity(entity, requeue)) + /* + * The parent entity is still backlogged, and + * we don't need to update it as it is still + * under service. + */ + break; + + if (sd->next_active != NULL) + /* + * The parent entity is still backlogged and + * the budgets on the path towards the root + * need to be updated. + */ + goto update; + + /* + * If we reach there the parent is no more backlogged and + * we want to propagate the dequeue upwards. + */ + requeue = 1; + } + + return; + +update: + entity = parent; + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_active(sd)) + break; + } +} + +/** + * bfq_update_vtime - update vtime if necessary. + * @st: the service tree to act upon. + * + * If necessary update the service tree vtime to have at least one + * eligible entity, skipping to its start time. Assumes that the + * active tree of the device is not empty. + * + * NOTE: this hierarchical implementation updates vtimes quite often, + * we may end up with reactivated tasks getting timestamps after a + * vtime skip done because we needed a ->first_active entity on some + * intermediate node. + */ +static void bfq_update_vtime(struct bfq_service_tree *st) +{ + struct bfq_entity *entry; + struct rb_node *node = st->active.rb_node; + + entry = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entry->min_start, st->vtime)) { + st->vtime = entry->min_start; + bfq_forget_idle(st); + } +} + +/** + * bfq_first_active - find the eligible entity with the smallest finish time + * @st: the service tree to select from. + * + * This function searches the first schedulable entity, starting from the + * root of the tree and going on the left every time on this side there is + * a subtree with at least one eligible (start >= vtime) entity. The path + * on the right is followed only if a) the left subtree contains no eligible + * entities and b) no eligible entity has been found yet. + */ +static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) +{ + struct bfq_entity *entry, *first = NULL; + struct rb_node *node = st->active.rb_node; + + while (node != NULL) { + entry = rb_entry(node, struct bfq_entity, rb_node); +left: + if (!bfq_gt(entry->start, st->vtime)) + first = entry; + + BUG_ON(bfq_gt(entry->min_start, st->vtime)); + + if (node->rb_left != NULL) { + entry = rb_entry(node->rb_left, + struct bfq_entity, rb_node); + if (!bfq_gt(entry->min_start, st->vtime)) { + node = node->rb_left; + goto left; + } + } + if (first != NULL) + break; + node = node->rb_right; + } + + BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); + return first; +} + +/** + * __bfq_lookup_next_entity - return the first eligible entity in @st. + * @st: the service tree. + * + * Update the virtual time in @st and return the first eligible entity + * it contains. + */ +static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, + bool force) +{ + struct bfq_entity *entity, *new_next_active = NULL; + + if (RB_EMPTY_ROOT(&st->active)) + return NULL; + + bfq_update_vtime(st); + entity = bfq_first_active_entity(st); + BUG_ON(bfq_gt(entity->start, st->vtime)); + + /* + * If the chosen entity does not match with the sched_data's + * next_active and we are forcedly serving the IDLE priority + * class tree, bubble up budget update. + */ + if (unlikely(force && entity != entity->sched_data->next_active)) { + new_next_active = entity; + for_each_entity(new_next_active) + bfq_update_budget(new_next_active); + } + + return entity; +} + +/** + * bfq_lookup_next_entity - return the first eligible entity in @sd. + * @sd: the sched_data. + * @extract: if true the returned entity will be also extracted from @sd. + * + * NOTE: since we cache the next_active entity at each level of the + * hierarchy, the complexity of the lookup can be decreased with + * absolutely no effort just returning the cached next_active value; + * we prefer to do full lookups to test the consistency of * the data + * structures. + */ +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd) +{ + struct bfq_service_tree *st = sd->service_tree; + struct bfq_entity *entity; + int i=0; + + BUG_ON(sd->active_entity != NULL); + + if (bfqd != NULL && + jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { + entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true); + if (entity != NULL) { + i = BFQ_IOPRIO_CLASSES - 1; + bfqd->bfq_class_idle_last_service = jiffies; + sd->next_active = entity; + } + } + for (; i < BFQ_IOPRIO_CLASSES; i++) { + entity = __bfq_lookup_next_entity(st + i, false); + if (entity != NULL) { + if (extract) { + bfq_check_next_active(sd, entity); + bfq_active_extract(st + i, entity); + sd->active_entity = entity; + sd->next_active = NULL; + } + break; + } + } + + return entity; +} + +/* + * Get next queue for service. + */ +static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) +{ + struct bfq_entity *entity = NULL; + struct bfq_sched_data *sd; + struct bfq_queue *bfqq; + + BUG_ON(bfqd->active_queue != NULL); + + if (bfqd->busy_queues == 0) + return NULL; + + sd = &bfqd->root_group->sched_data; + for (; sd != NULL; sd = entity->my_sched_data) { + entity = bfq_lookup_next_entity(sd, 1, bfqd); + BUG_ON(entity == NULL); + entity->service = 0; + } + + bfqq = bfq_entity_to_bfqq(entity); + BUG_ON(bfqq == NULL); + + return bfqq; +} + +static void __bfq_bfqd_reset_active(struct bfq_data *bfqd) +{ + if (bfqd->active_cic != NULL) { + put_io_context(bfqd->active_cic->ioc); + bfqd->active_cic = NULL; + } + + bfqd->active_queue = NULL; + del_timer(&bfqd->idle_slice_timer); +} + +static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + struct bfq_entity *entity = &bfqq->entity; + + if (bfqq == bfqd->active_queue) + __bfq_bfqd_reset_active(bfqd); + + bfq_deactivate_entity(entity, requeue); +} + +static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_activate_entity(entity); +} + +/* + * Called when the bfqq no longer has requests pending, remove it from + * the service tree. + */ +static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + BUG_ON(!bfq_bfqq_busy(bfqq)); + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + bfq_log_bfqq(bfqd, bfqq, "del from busy"); + + bfq_clear_bfqq_busy(bfqq); + + BUG_ON(bfqd->busy_queues == 0); + bfqd->busy_queues--; + + bfq_deactivate_bfqq(bfqd, bfqq, requeue); +} + +/* + * Called when an inactive queue receives a new request. + */ +static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqq == bfqd->active_queue); + + bfq_log_bfqq(bfqd, bfqq, "add to busy"); + + bfq_activate_bfqq(bfqd, bfqq); + + bfq_mark_bfqq_busy(bfqq); + bfqd->busy_queues++; +} diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 index 00000000000..8d507533998 --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,606 @@ +/* + * BFQ-v6r2 for 3.1.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +#ifndef _BFQ_H +#define _BFQ_H + +#include +#include +#include +#include + +#define BFQ_IOPRIO_CLASSES 3 +#define BFQ_CL_IDLE_TIMEOUT HZ/5 + +#define BFQ_MIN_WEIGHT 1 +#define BFQ_MAX_WEIGHT 1000 + +#define BFQ_DEFAULT_GRP_WEIGHT 10 +#define BFQ_DEFAULT_GRP_IOPRIO 0 +#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE + +struct bfq_entity; + +/** + * struct bfq_service_tree - per ioprio_class service tree. + * @active: tree for active entities (i.e., those backlogged). + * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). + * @first_idle: idle entity with minimum F_i. + * @last_idle: idle entity with maximum F_i. + * @vtime: scheduler virtual time. + * @wsum: scheduler weight sum; active and idle entities contribute to it. + * + * Each service tree represents a B-WF2Q+ scheduler on its own. Each + * ioprio_class has its own independent scheduler, and so its own + * bfq_service_tree. All the fields are protected by the queue lock + * of the containing bfqd. + */ +struct bfq_service_tree { + struct rb_root active; + struct rb_root idle; + + struct bfq_entity *first_idle; + struct bfq_entity *last_idle; + + u64 vtime; + unsigned long wsum; +}; + +/** + * struct bfq_sched_data - multi-class scheduler. + * @active_entity: entity under service. + * @next_active: head-of-the-line entity in the scheduler. + * @service_tree: array of service trees, one per ioprio_class. + * + * bfq_sched_data is the basic scheduler queue. It supports three + * ioprio_classes, and can be used either as a toplevel queue or as + * an intermediate queue on a hierarchical setup. + * @next_active points to the active entity of the sched_data service + * trees that will be scheduled next. + * + * The supported ioprio_classes are the same as in CFQ, in descending + * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. + * Requests from higher priority queues are served before all the + * requests from lower priority queues; among requests of the same + * queue requests are served according to B-WF2Q+. + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_sched_data { + struct bfq_entity *active_entity; + struct bfq_entity *next_active; + struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; +}; + +/** + * struct bfq_entity - schedulable entity. + * @rb_node: service_tree member. + * @on_st: flag, true if the entity is on a tree (either the active or + * the idle one of its service_tree). + * @finish: B-WF2Q+ finish timestamp (aka F_i). + * @start: B-WF2Q+ start timestamp (aka S_i). + * @tree: tree the entity is enqueued into; %NULL if not on a tree. + * @min_start: minimum start time of the (active) subtree rooted at + * this entity; used for O(log N) lookups into active trees. + * @service: service received during the last round of service. + * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. + * @weight: weight of the queue + * @parent: parent entity, for hierarchical scheduling. + * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the + * associated scheduler queue, %NULL on leaf nodes. + * @sched_data: the scheduler queue this entity belongs to. + * @ioprio: the ioprio in use. + * @new_weight: when a weight change is requested, the new weight value. + * @orig_weight: original weight, used to implement weight boosting + * @new_ioprio: when an ioprio change is requested, the new ioprio value. + * @ioprio_class: the ioprio_class in use. + * @new_ioprio_class: when an ioprio_class change is requested, the new + * ioprio_class value. + * @ioprio_changed: flag, true when the user requested a weight, ioprio or + * ioprio_class change. + * + * A bfq_entity is used to represent either a bfq_queue (leaf node in the + * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each + * entity belongs to the sched_data of the parent group in the cgroup + * hierarchy. Non-leaf entities have also their own sched_data, stored + * in @my_sched_data. + * + * Each entity stores independently its priority values; this would + * allow different weights on different devices, but this + * functionality is not exported to userspace by now. Priorities and + * weights are updated lazily, first storing the new values into the + * new_* fields, then setting the @ioprio_changed flag. As soon as + * there is a transition in the entity state that allows the priority + * update to take place the effective and the requested priority + * values are synchronized. + * + * Unless cgroups are used, the weight value is calculated from the + * ioprio to export the same interface as CFQ. When dealing with + * ``well-behaved'' queues (i.e., queues that do not spend too much + * time to consume their budget and have true sequential behavior, and + * when there are no external factors breaking anticipation) the + * relative weights at each level of the cgroups hierarchy should be + * guaranteed. All the fields are protected by the queue lock of the + * containing bfqd. + */ +struct bfq_entity { + struct rb_node rb_node; + + int on_st; + + u64 finish; + u64 start; + + struct rb_root *tree; + + u64 min_start; + + unsigned long service, budget; + unsigned short weight, new_weight; + unsigned short orig_weight; + + struct bfq_entity *parent; + + struct bfq_sched_data *my_sched_data; + struct bfq_sched_data *sched_data; + + unsigned short ioprio, new_ioprio; + unsigned short ioprio_class, new_ioprio_class; + + int ioprio_changed; +}; + +struct bfq_group; + +/** + * struct bfq_queue - leaf schedulable entity. + * @ref: reference counter. + * @bfqd: parent bfq_data. + * @new_bfqq: shared bfq_queue if queue is cooperating with + * one or more other queues. + * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). + * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). + * @sort_list: sorted list of pending requests. + * @next_rq: if fifo isn't expired, next request to serve. + * @queued: nr of requests queued in @sort_list. + * @allocated: currently allocated requests. + * @meta_pending: pending metadata requests. + * @fifo: fifo list of requests in sort_list. + * @entity: entity representing this queue in the scheduler. + * @max_budget: maximum budget allowed from the feedback mechanism. + * @budget_timeout: budget expiration (in jiffies). + * @dispatched: number of requests on the dispatch list or inside driver. + * @org_ioprio: saved ioprio during boosted periods. + * @flags: status flags. + * @bfqq_list: node for active/idle bfqq list inside our bfqd. + * @seek_samples: number of seeks sampled + * @seek_total: sum of the distances of the seeks sampled + * @seek_mean: mean seek distance + * @last_request_pos: position of the last request enqueued + * @pid: pid of the process owning the queue, used for logging purposes. + * @last_rais_start_time: last (idle -> weight-raised) transition attempt + * @raising_cur_max_time: current max raising time for this queue + * @cic: pointer to the cfq_io_context owning the bfq_queue, set to %NULL if the + * queue is shared + * + * A bfq_queue is a leaf request queue; it can be associated to an io_context + * or more (if it is an async one). @cgroup holds a reference to the + * cgroup, to be sure that it does not disappear while a bfqq still + * references it (mostly to avoid races between request issuing and task + * migration followed by cgroup distruction). + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_queue { + atomic_t ref; + struct bfq_data *bfqd; + + /* fields for cooperating queues handling */ + struct bfq_queue *new_bfqq; + struct rb_node pos_node; + struct rb_root *pos_root; + + struct rb_root sort_list; + struct request *next_rq; + int queued[2]; + int allocated[2]; + int meta_pending; + struct list_head fifo; + + struct bfq_entity entity; + + unsigned long max_budget; + unsigned long budget_timeout; + + int dispatched; + + unsigned short org_ioprio; + + unsigned int flags; + + struct list_head bfqq_list; + + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + sector_t last_request_pos; + + pid_t pid; + + /* weight-raising fields */ + unsigned int raising_cur_max_time; + u64 last_rais_start_finish, soft_rt_next_start; + unsigned int raising_coeff; + + struct cfq_io_context *cic; +}; + +/** + * struct bfq_data - per device data structure. + * @queue: request queue for the managed device. + * @root_group: root bfq_group for the device. + * @rq_pos_tree: rbtree sorted by next_request position, + * used when determining if two or more queues + * have interleaving requests (see bfq_close_cooperator). + * @eqm_lock: spinlock used to protect all data structures pertaining + * the Early Queue Merge (EQM) mechanism. + * @busy_queues: number of bfq_queues containing requests (including the + * queue under service, even if it is idling). + * @queued: number of queued requests. + * @rq_in_driver: number of requests dispatched and waiting for completion. + * @sync_flight: number of sync requests in the driver. + * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples + * completed requests . + * @hw_tag_samples: nr of samples used to calculate hw_tag. + * @hw_tag: flag set to one if the driver is showing a queueing behavior. + * @budgets_assigned: number of budgets assigned. + * @idle_slice_timer: timer set when idling for the next sequential request + * from the queue under service. + * @unplug_work: delayed work to restart dispatching on the request queue. + * @active_queue: bfq_queue under service. + * @active_cic: cfq_io_context (cic) associated with the @active_queue. + * @last_position: on-disk position of the last served request. + * @last_budget_start: beginning of the last budget. + * @last_idling_start: beginning of the last idle slice. + * @peak_rate: peak transfer rate observed for a budget. + * @peak_rate_samples: number of samples used to calculate @peak_rate. + * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling. + * @cic_index: use small consequent indexes as radix tree keys to reduce depth + * @cic_list: list of all the cics active on the bfq_data device. + * @group_list: list of all the bfq_groups active on the device. + * @active_list: list of all the bfq_queues active on the device. + * @idle_list: list of all the bfq_queues idle on the device. + * @bfq_quantum: max number of requests dispatched per dispatch round. + * @bfq_fifo_expire: timeout for async/sync requests; when it expires + * requests are served in fifo order. + * @bfq_back_penalty: weight of backward seeks wrt forward ones. + * @bfq_back_max: maximum allowed backward seek. + * @bfq_slice_idle: maximum idling time. + * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning). + * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to + * async queues. + * @bfq_timeout: timeout for bfq_queues to consume their budget; used to + * to prevent seeky queues to impose long latencies to well + * behaved ones (this also implies that seeky queues cannot + * receive guarantees in the service domain; after a timeout + * they are charged for the whole allocated budget, to try + * to preserve a behavior reasonably fair among them, but + * without service-domain guarantees). + * @bfq_raising_coeff: Maximum factor by which the weight of a boosted + * queue is multiplied + * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies) + * @bfq_raising_rt_max_time: maximum duration for soft real-time processes + * @bfq_raising_min_idle_time: minimum idle period after which weight-raising + * may be reactivated for a queue (in jiffies) + * @bfq_raising_min_inter_arr_async: minimum period between request arrivals + * after which weight-raising may be + * reactivated for an already busy queue + * (in jiffies) + * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue, + * sectors per seconds + * @RT_prod: cached value of the product R*T used for computing the maximum + * duration of the weight raising automatically + * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions + * + * All the fields are protected by the @queue lock. + */ +struct bfq_data { + struct request_queue *queue; + + struct bfq_group *root_group; + + struct rb_root rq_pos_tree; + spinlock_t eqm_lock; + + int busy_queues; + int queued; + int rq_in_driver; + int sync_flight; + + int max_rq_in_driver; + int hw_tag_samples; + int hw_tag; + + int budgets_assigned; + + struct timer_list idle_slice_timer; + struct work_struct unplug_work; + + struct bfq_queue *active_queue; + struct cfq_io_context *active_cic; + + sector_t last_position; + + ktime_t last_budget_start; + ktime_t last_idling_start; + int peak_rate_samples; + u64 peak_rate; + unsigned long bfq_max_budget; + + unsigned int cic_index; + struct list_head cic_list; + struct hlist_head group_list; + struct list_head active_list; + struct list_head idle_list; + + unsigned int bfq_quantum; + unsigned int bfq_fifo_expire[2]; + unsigned int bfq_back_penalty; + unsigned int bfq_back_max; + unsigned int bfq_slice_idle; + u64 bfq_class_idle_last_service; + + unsigned int bfq_user_max_budget; + unsigned int bfq_max_budget_async_rq; + unsigned int bfq_timeout[2]; + + bool low_latency; + + /* parameters of the low_latency heuristics */ + unsigned int bfq_raising_coeff; + unsigned int bfq_raising_max_time; + unsigned int bfq_raising_rt_max_time; + unsigned int bfq_raising_min_idle_time; + unsigned int bfq_raising_min_inter_arr_async; + unsigned int bfq_raising_max_softrt_rate; + u64 RT_prod; + + struct bfq_queue oom_bfqq; +}; + +enum bfqq_state_flags { + BFQ_BFQQ_FLAG_busy = 0, /* has requests or is under service */ + BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ + BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ + BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ + BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ + BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ + BFQ_BFQQ_FLAG_sync, /* synchronous queue */ + BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ + BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ + BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ + BFQ_BFQQ_FLAG_some_coop_idle, /* some cooperator is inactive */ + BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ +}; + +#define BFQ_BFQQ_FNS(name) \ +static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ +{ \ + return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ +} + +BFQ_BFQQ_FNS(busy); +BFQ_BFQQ_FNS(wait_request); +BFQ_BFQQ_FNS(must_alloc); +BFQ_BFQQ_FNS(fifo_expire); +BFQ_BFQQ_FNS(idle_window); +BFQ_BFQQ_FNS(prio_changed); +BFQ_BFQQ_FNS(sync); +BFQ_BFQQ_FNS(budget_new); +BFQ_BFQQ_FNS(coop); +BFQ_BFQQ_FNS(split_coop); +BFQ_BFQQ_FNS(some_coop_idle); +BFQ_BFQQ_FNS(just_split); +#undef BFQ_BFQQ_FNS + +/* Logging facilities. */ +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) + +#define bfq_log(bfqd, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) + +/* Expiration reasons. */ +enum bfqq_expiration { + BFQ_BFQQ_TOO_IDLE = 0, /* queue has been idling for too long */ + BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ + BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ + BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ +}; + +#ifdef CONFIG_CGROUP_BFQIO +/** + * struct bfq_group - per (device, cgroup) data structure. + * @entity: schedulable entity to insert into the parent group sched_data. + * @sched_data: own sched_data, to contain child entities (they may be + * both bfq_queues and bfq_groups). + * @group_node: node to be inserted into the bfqio_cgroup->group_data + * list of the containing cgroup's bfqio_cgroup. + * @bfqd_node: node to be inserted into the @bfqd->group_list list + * of the groups active on the same device; used for cleanup. + * @bfqd: the bfq_data for the device this group acts upon. + * @async_bfqq: array of async queues for all the tasks belonging to + * the group, one queue per ioprio value per ioprio_class, + * except for the idle class that has only one queue. + * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). + * @my_entity: pointer to @entity, %NULL for the toplevel group; used + * to avoid too many special cases during group creation/migration. + * + * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup + * there is a set of bfq_groups, each one collecting the lower-level + * entities belonging to the group that are acting on the same device. + * + * Locking works as follows: + * o @group_node is protected by the bfqio_cgroup lock, and is accessed + * via RCU from its readers. + * o @bfqd is protected by the queue lock, RCU is used to access it + * from the readers. + * o All the other fields are protected by the @bfqd queue lock. + */ +struct bfq_group { + struct bfq_entity entity; + struct bfq_sched_data sched_data; + + struct hlist_node group_node; + struct hlist_node bfqd_node; + + void *bfqd; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; + + struct bfq_entity *my_entity; +}; + +/** + * struct bfqio_cgroup - bfq cgroup data structure. + * @css: subsystem state for bfq in the containing cgroup. + * @weight: cgroup weight. + * @ioprio: cgroup ioprio. + * @ioprio_class: cgroup ioprio_class. + * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. + * @group_data: list containing the bfq_group belonging to this cgroup. + * + * @group_data is accessed using RCU, with @lock protecting the updates, + * @ioprio and @ioprio_class are protected by @lock. + */ +struct bfqio_cgroup { + struct cgroup_subsys_state css; + + unsigned short weight, ioprio, ioprio_class; + + spinlock_t lock; + struct hlist_head group_data; +}; +#else +struct bfq_group { + struct bfq_sched_data sched_data; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; +}; +#endif + +static inline struct bfq_service_tree * +bfq_entity_service_tree(struct bfq_entity *entity) +{ + struct bfq_sched_data *sched_data = entity->sched_data; + unsigned int idx = entity->ioprio_class - 1; + + BUG_ON(idx >= BFQ_IOPRIO_CLASSES); + BUG_ON(sched_data == NULL); + + return sched_data->service_tree + idx; +} + +static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic, + int is_sync) +{ + return cic->cfqq[!!is_sync]; +} + +static inline void cic_set_bfqq(struct cfq_io_context *cic, + struct bfq_queue *bfqq, int is_sync) +{ + cic->cfqq[!!is_sync] = bfqq; +} + +static inline void call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, + struct cfq_io_context *)) +{ + struct cfq_io_context *cic; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + func(ioc, cic); + rcu_read_unlock(); +} + +#define CIC_DEAD_KEY 1ul +#define CIC_DEAD_INDEX_SHIFT 1 + +static inline void *bfqd_dead_key(struct bfq_data *bfqd) +{ + return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); +} + +/** + * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. + * @ptr: a pointer to a bfqd. + * @flags: storage for the flags to be saved. + * + * This function allows cic->key and bfqg->bfqd to be protected by the + * queue lock of the bfqd they reference; the pointer is dereferenced + * under RCU, so the storage for bfqd is assured to be safe as long + * as the RCU read side critical section does not end. After the + * bfqd->queue->queue_lock is taken the pointer is rechecked, to be + * sure that no other writer accessed it. If we raced with a writer, + * the function returns NULL, with the queue unlocked, otherwise it + * returns the dereferenced pointer, with the queue locked. + */ +static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, + unsigned long *flags) +{ + struct bfq_data *bfqd; + + rcu_read_lock(); + bfqd = rcu_dereference(*(struct bfq_data **)ptr); + + if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) { + spin_lock_irqsave(bfqd->queue->queue_lock, *flags); + if (*ptr == bfqd) + goto out; + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); + } + + bfqd = NULL; +out: + rcu_read_unlock(); + return bfqd; +} + +static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, + unsigned long *flags) +{ + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic); +static void bfq_put_queue(struct bfq_queue *bfqq); +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask); +static void bfq_end_raising_async_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg); +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); +#endif From 019a0107b0508b06ae5f30c2c6e410b1ef411e2d Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 15:58:11 -0500 Subject: [PATCH 607/678] [PATCH 3/4] block: introduce the BFQ-v6r2 I/O sched for 3.1 --- include/linux/iocontext.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index fbdaa5aef61..69fdd5894ef 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -22,9 +22,6 @@ struct cfq_io_context { struct cfq_ttime ttime; - unsigned int raising_time_left; - unsigned int saved_idle_window; - struct list_head queue_list; struct hlist_node cic_list; From 5462f000d40eb613a8e544d09b461fc7d763de41 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 15:59:19 -0500 Subject: [PATCH 608/678] [PATCH 4/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v6r2 for 3.1.0 --- include/linux/iocontext.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 69fdd5894ef..fbdaa5aef61 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -22,6 +22,9 @@ struct cfq_io_context { struct cfq_ttime ttime; + unsigned int raising_time_left; + unsigned int saved_idle_window; + struct list_head queue_list; struct hlist_node cic_list; From 0b3760053910b0714ea1fa5d210e8c210007f7d0 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 21 Nov 2013 18:13:54 -0500 Subject: [PATCH 609/678] defconfig: a65 --- arch/arm/configs/metallice_grouper_defconfig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 45e0a6a03ff..4c61f0bf3e2 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-" +CONFIG_LOCALVERSION="-MKernel-a65" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -181,11 +181,14 @@ CONFIG_IOSCHED_ROW=y CONFIG_IOSCHED_CFQ=y # CONFIG_IOSCHED_SIO is not set # CONFIG_IOSCHED_VR is not set -CONFIG_DEFAULT_DEADLINE=y +CONFIG_IOSCHED_BFQ=y +CONFIG_CGROUP_BFQIO=y +# CONFIG_DEFAULT_DEADLINE is not set # CONFIG_DEFAULT_ROW is not set # CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_BFQ=y # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="deadline" +CONFIG_DEFAULT_IOSCHED="bfq" # CONFIG_INLINE_SPIN_TRYLOCK is not set # CONFIG_INLINE_SPIN_TRYLOCK_BH is not set # CONFIG_INLINE_SPIN_LOCK is not set From 37d9b2eb7dd760d3b5dedee22fc0cf3fbf9fb150 Mon Sep 17 00:00:00 2001 From: Metallice Date: Fri, 22 Nov 2013 12:23:09 -0500 Subject: [PATCH 610/678] block: bfq-iosched.c: tweak default values --- block/bfq-iosched.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index ee6ce52d9ec..44132842fcc 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -96,8 +96,10 @@ static const int bfq_max_budget_async_rq = 4; static const int bfq_async_charge_factor = 10; /* Default timeout values, in jiffies, approximating CFQ defaults. */ -static const int bfq_timeout_sync = HZ / 8; -static int bfq_timeout_async = HZ / 25; +//static const int bfq_timeout_sync = HZ / 8; +static const int bfq_timeout_sync = 7; +//static int bfq_timeout_async = HZ / 25; +static int bfq_timeout_async = 5; struct kmem_cache *bfq_pool; struct kmem_cache *bfq_ioc_pool; @@ -3332,11 +3334,12 @@ static int __init bfq_init(void) /* * Can be 0 on HZ < 1000 setups. */ - if (bfq_slice_idle == 0) - bfq_slice_idle = 1; + //if (bfq_slice_idle == 0) + // bfq_slice_idle = 1; + bfq_slice_idle = 0; - if (bfq_timeout_async == 0) - bfq_timeout_async = 1; + //if (bfq_timeout_async == 0) + // bfq_timeout_async = 1; if (bfq_slab_setup()) return -ENOMEM; From 2f59bc1de5657901ea5abe6420d314fa589c1d9c Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 29 Dec 2013 19:22:31 -0500 Subject: [PATCH 611/678] touchscreen: ektf3k.c: only hold wakelock if touch to wake is enabled --- drivers/input/touchscreen/ektf3k.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index 61eab07af18..fedb6f082b0 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -2169,10 +2169,12 @@ static int elan_ktf3k_ts_suspend(struct i2c_client *client, pm_message_t mesg) rc = elan_ktf3k_ts_set_power_state(client, PWR_STATE_DEEP_SLEEP); /*s2w*/ scr_suspended = true; - if (wake_timeout == 0) { - wake_lock(&d2w_wakelock); - } else { - wake_lock_timeout(&d2w_wakelock, 100 * wake_timeout); + if ((dt2w_switch == 1) || (s2w_switch == 1)) { + if (wake_timeout == 0) { + wake_lock(&d2w_wakelock); + } else { + wake_lock_timeout(&d2w_wakelock, 100 * wake_timeout); + } } return 0; } From c062b9cdedee9e0fb00a68d0884931ff9ca48fba Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 29 Dec 2013 19:24:39 -0500 Subject: [PATCH 612/678] defconfig: a66 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 4c61f0bf3e2..b762f55780c 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a65" +CONFIG_LOCALVERSION="-MKernel-a66" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y From 04b613b14c2e9ebf9f10b5cbb672ef418eeb4e2e Mon Sep 17 00:00:00 2001 From: Hank_Lee Date: Mon, 4 Nov 2013 15:01:22 +0800 Subject: [PATCH 613/678] charger: smb345: add hot limit temperature to stop charging Bug: 11476629 Change-Id: I5c7039e2a371b404f9f4e86e7b28542921138608 Signed-off-by: Hank_Lee --- drivers/power/bq27541_battery.c | 31 +++++++++ drivers/power/smb347-charger.c | 116 ++++++++++++++++++++++---------- 2 files changed, 111 insertions(+), 36 deletions(-) diff --git a/drivers/power/bq27541_battery.c b/drivers/power/bq27541_battery.c index 6816b7eecb7..4a905d37c6b 100755 --- a/drivers/power/bq27541_battery.c +++ b/drivers/power/bq27541_battery.c @@ -89,6 +89,7 @@ static int bq27541_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); extern unsigned get_usb_cable_status(void); extern int smb347_charger_enable(bool enable); +extern int smb347_config_thermal_charging(int temp); module_param(battery_current, uint, 0644); module_param(battery_remaining_capacity, uint, 0644); @@ -149,6 +150,7 @@ static enum power_supply_property bq27541_properties[] = { POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_TEMP, + POWER_SUPPLY_PROP_CURRENT_NOW, }; void check_cabe_type(void) @@ -345,6 +347,26 @@ static const struct attribute_group battery_smbus_group = { .attrs = battery_smbus_attributes, }; +static int bq27541_battery_current(void) +{ + int ret; + int curr = 0; + + ret = bq27541_read_i2c(bq27541_data[REG_CURRENT].addr, &curr, 0); + if (ret) { + BAT_ERR("error reading current ret = %x\n", ret); + return 0; + } + + curr = (s16)curr; + + if (curr >= bq27541_data[REG_CURRENT].min_value && + curr <= bq27541_data[REG_CURRENT].max_value) { + return curr; + } else + return 0; +} + static void battery_status_poll(struct work_struct *work) { struct bq27541_device_info *batt_dev = container_of(work, struct bq27541_device_info, status_poll_work.work); @@ -354,6 +376,10 @@ static void battery_status_poll(struct work_struct *work) power_supply_changed(&bq27541_supply[Charger_Type_Battery]); + if (!bq27541_device->temp_err) + if (ac_on || usb_on) + smb347_config_thermal_charging(bq27541_device->old_temperature/10); + /* Schedule next polling */ queue_delayed_work(battery_work_queue, &batt_dev->status_poll_work, bat_check_interval*HZ); } @@ -612,6 +638,11 @@ static int bq27541_get_psp(int reg_offset, enum power_supply_property psp, bq27541_device->old_temperature = val->intval = ret; BAT_NOTICE("temperature= %u (0.1¢XC)\n", val->intval); } + if (psp == POWER_SUPPLY_PROP_CURRENT_NOW) { + val->intval = bq27541_device->bat_current + = bq27541_battery_current(); + BAT_NOTICE("current = %d mA\n", val->intval); + } return 0; } diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index 03f5263bacf..1c5d1366df2 100755 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -108,6 +108,8 @@ #define DELAY_FOR_CURR_LIMIT_RECONF (60) #define ADAPTER_PROTECT_DELAY (4*HZ) #define GPIO_AC_OK TEGRA_GPIO_PV1 +#define ENABLE_PIN_CTRL_MASK 0x60 +#define BAT_Hot_Limit 45 /* Functions declaration */ static int smb347_configure_charger(struct i2c_client *client, int value); @@ -1060,42 +1062,37 @@ static void dockin_isr_work_function(struct work_struct *dat) static ssize_t smb347_reg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = charger->client; - uint8_t config_reg[14], cmd_reg[1], status_reg[10]; - int i, ret = 0; - - ret += i2c_smbus_read_i2c_block_data(client, smb347_CHARGE, 15, config_reg) - + i2c_smbus_read_i2c_block_data(client, smb347_CMD_REG, 2, cmd_reg) - + i2c_smbus_read_i2c_block_data(client, smb347_INTR_STS_A, 11, status_reg); - - if (ret < 0) - SMB_ERR("failed to read charger reg !\n"); - - SMB_INFO("smb347 Registers\n"); - SMB_INFO("------------------\n"); - for(i=0;i<=14;i++) - SMB_INFO("Reg[%02xh]=0x%02x\n", i, config_reg[i]); - for(i=0;i<=1;i++) - SMB_INFO("Reg[%02xh]=0x%02x\n", 48+i, cmd_reg[i]); - for(i=0;i<=10;i++) - SMB_INFO("Reg[%02xh]=0x%02x\n", 53+i, status_reg[i]); - - return sprintf(buf, "Reg[06h]=0x%02x\n" - "Reg[08h]=0x%02x\n" - "Reg[30h]=0x%02x\n" - "Reg[31h]=0x%02x\n" - "Reg[39h]=0x%02x\n" - "Reg[3dh]=0x%02x\n" - "Reg[3eh]=0x%02x\n" - "Reg[3fh]=0x%02x\n", - config_reg[6], - config_reg[8], - cmd_reg[0], - cmd_reg[1], - status_reg[4], - status_reg[8], - status_reg[9], - status_reg[10]); - + uint8_t config_reg[15], cmd_reg[1], status_reg[10]; + char tmp_buf[64]; + int i, cfg_ret, cmd_ret, sts_ret = 0; + + cfg_ret = i2c_smbus_read_i2c_block_data(client, smb347_CHARGE, 15, config_reg); + cmd_ret = i2c_smbus_read_i2c_block_data(client, smb347_CMD_REG, 2, cmd_reg); + sts_ret = i2c_smbus_read_i2c_block_data(client, smb347_INTR_STS_A, 11, status_reg); + + sprintf(tmp_buf, "SMB34x Configuration Registers Detail\n" + "==================\n"); + strcpy(buf, tmp_buf); + + if (cfg_ret > 0) { + for(i=0;i<=14;i++) { + sprintf(tmp_buf, "Reg%02xh:\t0x%02x\n", i, config_reg[i]); + strcat(buf, tmp_buf); + } + } + if (cmd_ret > 0) { + for(i=0;i<=1;i++) { + sprintf(tmp_buf, "Reg%02xh:\t0x%02x\n", 48+i, cmd_reg[i]); + strcat(buf, tmp_buf); + } + } + if (sts_ret > 0) { + for(i=0;i<=10;i++) { + sprintf(tmp_buf, "Reg%02xh:\t0x%02x\n", 53+i, status_reg[i]); + strcat(buf, tmp_buf); + } + } + return strlen(buf); } static void smb347_default_setback(void) @@ -1161,6 +1158,53 @@ static int smb347_temp_limit_setting(void) return -1; } +int smb347_config_thermal_charging(int temp) +{ + struct i2c_client *client = charger->client; + int ret = 0, retval, setting = 0; + + mdelay(150); + SMB_NOTICE("temp=%d\n", temp); + + ret = smb347_volatile_writes(client, smb347_ENABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() charger enable write error..\n", __func__); + goto error; + } + + /*charger enable/disable*/ + retval = smb347_read(client, smb347_PIN_CTRL); + if (retval < 0) { + dev_err(&client->dev, "%s(): Failed in reading 0x%02x", + __func__, smb347_PIN_CTRL); + goto error; + } + + setting = retval & ENABLE_PIN_CTRL_MASK; + if (temp > BAT_Hot_Limit) { + if (setting != 0x40) { + SMB_NOTICE("Charger disable\n"); + smb347_charger_enable(false); + } else + SMB_NOTICE("Bypass charger disable\n"); + } else { + if (setting != 0x60) { + SMB_NOTICE("Charger enable\n"); + smb347_charger_enable(true); + } else + SMB_NOTICE("Bypass charger enable\n"); + } + + ret = smb347_volatile_writes(client, smb347_DISABLE_WRITE); + if (ret < 0) { + dev_err(&client->dev, "%s() charger enable write error..\n", __func__); + goto error; + } +error: + return ret; +} +EXPORT_SYMBOL(smb347_config_thermal_charging); + static int __devinit smb347_probe(struct i2c_client *client, const struct i2c_device_id *id) { From bd7b6b1e0e1472593eb5cbb71a745e64f4efa85d Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 29 Dec 2013 22:32:20 -0500 Subject: [PATCH 614/678] update defconfig --- arch/arm/configs/metallice_grouper_defconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index b762f55780c..3d0d0379594 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a66" +CONFIG_LOCALVERSION="-MKernel-" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -752,7 +752,7 @@ CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y CONFIG_NETFILTER_XT_TARGET_TPROXY=y CONFIG_NETFILTER_XT_TARGET_TRACE=y CONFIG_NETFILTER_XT_TARGET_SECMARK=y -# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set # @@ -1288,7 +1288,7 @@ CONFIG_BCMDHD=y CONFIG_BCMDHD_FW_PATH="/system/vendor/firmware/fw_bcmdhd.bin" CONFIG_BCMDHD_NVRAM_PATH="/system/etc/nvram.txt" # CONFIG_DHD_USE_STATIC_BUF is not set -# CONFIG_DHD_USE_SCHED_SCAN is not set +CONFIG_DHD_USE_SCHED_SCAN=y CONFIG_DHD_ENABLE_P2P=y # CONFIG_HOSTAP is not set # CONFIG_IPW2100 is not set From 72056c97549873d8ef7c113e766fe842dea9e357 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 9 Jan 2014 17:10:16 -0500 Subject: [PATCH 615/678] defconfig: a67 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 3d0d0379594..241c0b045cd 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-" +CONFIG_LOCALVERSION="-MKernel-a67" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y From 183c93be601a67ff4e929a770a28e68ced07372a Mon Sep 17 00:00:00 2001 From: Andrea Date: Mon, 17 Feb 2014 00:08:25 +0100 Subject: [PATCH 616/678] F2FS for /data to improve IO Performance (2/2) See (1/2) for explanations Change-Id: I5afbd266d93eca401d2200e9342e589f2364852f --- Documentation/filesystems/00-INDEX | 2 + Documentation/filesystems/f2fs.txt | 502 +++++ arch/arm/configs/metallice_grouper_defconfig | 3 + arch/arm/configs/tegra3_android_defconfig | 3 + fs/Kconfig | 1 + fs/Makefile | 1 + fs/dcache.c | 4 +- fs/f2fs/Kconfig | 65 + fs/f2fs/Makefile | 7 + fs/f2fs/acl.c | 423 ++++ fs/f2fs/acl.h | 57 + fs/f2fs/checkpoint.c | 860 ++++++++ fs/f2fs/data.c | 790 ++++++++ fs/f2fs/debug.c | 353 ++++ fs/f2fs/dir.c | 714 +++++++ fs/f2fs/f2fs.h | 1290 ++++++++++++ fs/f2fs/file.c | 725 +++++++ fs/f2fs/gc.c | 738 +++++++ fs/f2fs/gc.h | 110 ++ fs/f2fs/hash.c | 101 + fs/f2fs/inode.c | 273 +++ fs/f2fs/namei.c | 557 ++++++ fs/f2fs/node.c | 1859 ++++++++++++++++++ fs/f2fs/node.h | 345 ++++ fs/f2fs/recovery.c | 502 +++++ fs/f2fs/segment.c | 1787 +++++++++++++++++ fs/f2fs/segment.h | 637 ++++++ fs/f2fs/super.c | 1154 +++++++++++ fs/f2fs/xattr.c | 600 ++++++ fs/f2fs/xattr.h | 152 ++ include/linux/dcache.h | 1 + include/linux/f2fs_fs.h | 424 ++++ include/linux/fs.h | 13 + include/linux/magic.h | 1 + include/linux/security.h | 19 +- include/linux/xattr.h | 6 + include/trace/events/f2fs.h | 682 +++++++ security/security.c | 33 + 38 files changed, 15791 insertions(+), 3 deletions(-) create mode 100644 Documentation/filesystems/f2fs.txt create mode 100644 fs/f2fs/Kconfig create mode 100644 fs/f2fs/Makefile create mode 100644 fs/f2fs/acl.c create mode 100644 fs/f2fs/acl.h create mode 100644 fs/f2fs/checkpoint.c create mode 100644 fs/f2fs/data.c create mode 100644 fs/f2fs/debug.c create mode 100644 fs/f2fs/dir.c create mode 100644 fs/f2fs/f2fs.h create mode 100644 fs/f2fs/file.c create mode 100644 fs/f2fs/gc.c create mode 100644 fs/f2fs/gc.h create mode 100644 fs/f2fs/hash.c create mode 100644 fs/f2fs/inode.c create mode 100644 fs/f2fs/namei.c create mode 100644 fs/f2fs/node.c create mode 100644 fs/f2fs/node.h create mode 100644 fs/f2fs/recovery.c create mode 100644 fs/f2fs/segment.c create mode 100644 fs/f2fs/segment.h create mode 100644 fs/f2fs/super.c create mode 100644 fs/f2fs/xattr.c create mode 100644 fs/f2fs/xattr.h create mode 100644 include/linux/f2fs_fs.h create mode 100644 include/trace/events/f2fs.h diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 8c624a18f67..ce5fd467791 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -48,6 +48,8 @@ ext4.txt - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. +f2fs.txt + - info and mount options for the F2FS filesystem. fuse.txt - info on the Filesystem in User SpacE including mount options. gfs2.txt diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt new file mode 100644 index 00000000000..d225139cacf --- /dev/null +++ b/Documentation/filesystems/f2fs.txt @@ -0,0 +1,502 @@ +================================================================================ +WHAT IS Flash-Friendly File System (F2FS)? +================================================================================ + +NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have +been equipped on a variety systems ranging from mobile to server systems. Since +they are known to have different characteristics from the conventional rotating +disks, a file system, an upper layer to the storage device, should adapt to the +changes from the sketch in the design level. + +F2FS is a file system exploiting NAND flash memory-based storage devices, which +is based on Log-structured File System (LFS). The design has been focused on +addressing the fundamental issues in LFS, which are snowball effect of wandering +tree and high cleaning overhead. + +Since a NAND flash memory-based storage device shows different characteristic +according to its internal geometry or flash memory management scheme, namely FTL, +F2FS and its tools support various parameters not only for configuring on-disk +layout, but also for selecting allocation and cleaning algorithms. + +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). +>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git + +For reporting bugs and sending patches, please use the following mailing list: +>> linux-f2fs-devel@lists.sourceforge.net + +================================================================================ +BACKGROUND AND DESIGN ISSUES +================================================================================ + +Log-structured File System (LFS) +-------------------------------- +"A log-structured file system writes all modifications to disk sequentially in +a log-like structure, thereby speeding up both file writing and crash recovery. +The log is the only structure on disk; it contains indexing information so that +files can be read back from the log efficiently. In order to maintain large free +areas on disk for fast writing, we divide the log into segments and use a +segment cleaner to compress the live information from heavily fragmented +segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and +implementation of a log-structured file system", ACM Trans. Computer Systems +10, 1, 26–52. + +Wandering Tree Problem +---------------------- +In LFS, when a file data is updated and written to the end of log, its direct +pointer block is updated due to the changed location. Then the indirect pointer +block is also updated due to the direct pointer block update. In this manner, +the upper index structures such as inode, inode map, and checkpoint block are +also updated recursively. This problem is called as wandering tree problem [1], +and in order to enhance the performance, it should eliminate or relax the update +propagation as much as possible. + +[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ + +Cleaning Overhead +----------------- +Since LFS is based on out-of-place writes, it produces so many obsolete blocks +scattered across the whole storage. In order to serve new empty log space, it +needs to reclaim these obsolete blocks seamlessly to users. This job is called +as a cleaning process. + +The process consists of three operations as follows. +1. A victim segment is selected through referencing segment usage table. +2. It loads parent index structures of all the data in the victim identified by + segment summary blocks. +3. It checks the cross-reference between the data and its parent index structure. +4. It moves valid data selectively. + +This cleaning job may cause unexpected long delays, so the most important goal +is to hide the latencies to users. And also definitely, it should reduce the +amount of valid data to be moved, and move them quickly as well. + +================================================================================ +KEY FEATURES +================================================================================ + +Flash Awareness +--------------- +- Enlarge the random write area for better performance, but provide the high + spatial locality +- Align FS data structures to the operational units in FTL as best efforts + +Wandering Tree Problem +---------------------- +- Use a term, “nodeâ€, that represents inodes as well as various pointer blocks +- Introduce Node Address Table (NAT) containing the locations of all the “node†+ blocks; this will cut off the update propagation. + +Cleaning Overhead +----------------- +- Support a background cleaning process +- Support greedy and cost-benefit algorithms for victim selection policies +- Support multi-head logs for static/dynamic hot and cold data separation +- Introduce adaptive logging for efficient block allocation + +================================================================================ +MOUNT OPTIONS +================================================================================ + +background_gc=%s Turn on/off cleaning operations, namely garbage + collection, triggered in background when I/O subsystem is + idle. If background_gc=on, it will turn on the garbage + collection and if background_gc=off, garbage collection + will be truned off. + Default value for this option is on. So garbage + collection is on by default. +disable_roll_forward Disable the roll-forward recovery routine +discard Issue discard/TRIM commands when a segment is cleaned. +no_heap Disable heap-style segment allocation which finds free + segments for data from the beginning of main area, while + for node from the end of main area. +nouser_xattr Disable Extended User Attributes. Note: xattr is enabled + by default if CONFIG_F2FS_FS_XATTR is selected. +noacl Disable POSIX Access Control List. Note: acl is enabled + by default if CONFIG_F2FS_FS_POSIX_ACL is selected. +active_logs=%u Support configuring the number of active logs. In the + current design, f2fs supports only 2, 4, and 6 logs. + Default number is 6. +disable_ext_identify Disable the extension list configured by mkfs, so f2fs + does not aware of cold files such as media files. +inline_xattr Enable the inline xattrs feature. + +================================================================================ +DEBUGFS ENTRIES +================================================================================ + +/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as +f2fs. Each file shows the whole f2fs information. + +/sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +================================================================================ +SYSFS ENTRIES +================================================================================ + +Information about mounted f2f2 file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/ +(see also Documentation/ABI/testing/sysfs-fs-f2fs) +.............................................................................. + File Content + + gc_max_sleep_time This tuning parameter controls the maximum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_min_sleep_time This tuning parameter controls the minimum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_no_gc_sleep_time This tuning parameter controls the default sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_idle This parameter controls the selection of victim + policy for garbage collection. Setting gc_idle = 0 + (default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach + & setting gc_idle = 2 will select the greedy aproach. + + reclaim_segments This parameter controls the number of prefree + segments to be reclaimed. If the number of prefree + segments is larger than this number, f2fs tries to + conduct checkpoint to reclaim the prefree segments + to free segments. By default, 100 segments, 200MB. + +================================================================================ +USAGE +================================================================================ + +1. Download userland tools and compile them. + +2. Skip, if f2fs was compiled statically inside kernel. + Otherwise, insert the f2fs.ko module. + # insmod f2fs.ko + +3. Create a directory trying to mount + # mkdir /mnt/f2fs + +4. Format the block device, and then mount as f2fs + # mkfs.f2fs -l label /dev/block_device + # mount -t f2fs /dev/block_device /mnt/f2fs + +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: +-l [label] : Give a volume label, up to 512 unicode name. +-a [0 or 1] : Split start location of each area for heap-based allocation. + 1 is set by default, which performs this. +-o [int] : Set overprovision ratio in percent over volume size. + 5 is set by default. +-s [int] : Set the number of segments per section. + 1 is set by default. +-z [int] : Set the number of sections per zone. + 1 is set by default. +-e [str] : Set basic extension list. e.g. "mp3,gif,mov" +-t [0 or 1] : Disable discard command or not. + 1 is set by default, which conducts discard. + +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of: + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information reconized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of: + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples: +# dump.f2fs -i [ino] /dev/sdx +# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) +# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + +================================================================================ +DESIGN +================================================================================ + +On-disk Layout +-------------- + +F2FS divides the whole volume into a number of segments, each of which is fixed +to 2MB in size. A section is composed of consecutive segments, and a zone +consists of a set of sections. By default, section and zone sizes are set to one +segment size identically, but users can easily modify the sizes by mkfs. + +F2FS splits the entire volume into six areas, and all the areas except superblock +consists of multiple segments as described below. + + align with the zone size <-| + |-> align with the segment size + _________________________________________________________________________ + | | | Segment | Node | Segment | | + | Superblock | Checkpoint | Info. | Address | Summary | Main | + | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | | + |____________|_____2______|______N______|______N______|______N_____|__N___| + . . + . . + . . + ._________________________________________. + |_Segment_|_..._|_Segment_|_..._|_Segment_| + . . + ._________._________ + |_section_|__...__|_ + . . + .________. + |__zone__| + +- Superblock (SB) + : It is located at the beginning of the partition, and there exist two copies + to avoid file system crash. It contains basic partition information and some + default parameters of f2fs. + +- Checkpoint (CP) + : It contains file system information, bitmaps for valid NAT/SIT sets, orphan + inode lists, and summary entries of current active segments. + +- Segment Information Table (SIT) + : It contains segment information such as valid block count and bitmap for the + validity of all the blocks. + +- Node Address Table (NAT) + : It is composed of a block address table for all the node blocks stored in + Main area. + +- Segment Summary Area (SSA) + : It contains summary entries which contains the owner information of all the + data and node blocks stored in Main area. + +- Main Area + : It contains file and directory data including their indices. + +In order to avoid misalignment between file system and flash-based storage, F2FS +aligns the start block address of CP with the segment size. Also, it aligns the +start block address of Main area with the zone size by reserving some segments +in SSA area. + +Reference the following survey for additional technical details. +https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey + +File System Metadata Structure +------------------------------ + +F2FS adopts the checkpointing scheme to maintain file system consistency. At +mount time, F2FS first tries to find the last valid checkpoint data by scanning +CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. +One of them always indicates the last valid data, which is called as shadow copy +mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. + +For file system consistency, each CP points to which NAT and SIT copies are +valid, as shown as below. + + +--------+----------+---------+ + | CP | SIT | NAT | + +--------+----------+---------+ + . . . . + . . . . + . . . . + +-------+-------+--------+--------+--------+--------+ + | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 | + +-------+-------+--------+--------+--------+--------+ + | ^ ^ + | | | + `----------------------------------------' + +Index Structure +--------------- + +The key data structure to manage the data locations is a "node". Similar to +traditional file structures, F2FS has three types of node: inode, direct node, +indirect node. F2FS assigns 4KB to an inode block which contains 923 data block +indices, two direct node pointers, two indirect node pointers, and one double +indirect node pointer as described below. One direct node block contains 1018 +data blocks, and one indirect node block contains also 1018 node blocks. Thus, +one inode block (i.e., a file) covers: + + 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. + + Inode block (4KB) + |- data (923) + |- direct node (2) + | `- data (1018) + |- indirect node (2) + | `- direct node (1018) + | `- data (1018) + `- double indirect node (1) + `- indirect node (1018) + `- direct node (1018) + `- data (1018) + +Note that, all the node blocks are mapped by NAT which means the location of +each node is translated by the NAT table. In the consideration of the wandering +tree problem, F2FS is able to cut off the propagation of node updates caused by +leaf data writes. + +Directory Structure +------------------- + +A directory entry occupies 11 bytes, which consists of the following attributes. + +- hash hash value of the file name +- ino inode number +- len the length of file name +- type file type such as directory, symlink, etc + +A dentry block consists of 214 dentry slots and file names. Therein a bitmap is +used to represent whether each dentry is valid or not. A dentry block occupies +4KB with the following composition. + + Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + + dentries(11 * 214 bytes) + file name (8 * 214 bytes) + + [Bucket] + +--------------------------------+ + |dentry block 1 | dentry block 2 | + +--------------------------------+ + . . + . . + . [Dentry Block Structure: 4KB] . + +--------+----------+----------+------------+ + | bitmap | reserved | dentries | file names | + +--------+----------+----------+------------+ + [Dentry Block: 4KB] . . + . . + . . + +------+------+-----+------+ + | hash | ino | len | type | + +------+------+-----+------+ + [Dentry Structure: 11 bytes] + +F2FS implements multi-level hash tables for directory structure. Each level has +a hash table with dedicated number of hash buckets as shown below. Note that +"A(2B)" means a bucket includes 2 data blocks. + +---------------------- +A : bucket +B : block +N : MAX_DIR_HASH_DEPTH +---------------------- + +level #0 | A(2B) + | +level #1 | A(2B) - A(2B) + | +level #2 | A(2B) - A(2B) - A(2B) - A(2B) + . | . . . . +level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) + . | . . . . +level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) + +The number of blocks and buckets are determined by, + + ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, + # of blocks in level #n = | + `- 4, Otherwise + + ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + # of buckets in level #n = | + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + +When F2FS finds a file name in a directory, at first a hash value of the file +name is calculated. Then, F2FS scans the hash table in level #0 to find the +dentry consisting of the file name and its inode number. If not found, F2FS +scans the next hash table in level #1. In this way, F2FS scans hash tables in +each levels incrementally from 1 to N. In each levels F2FS needs to scan only +one bucket determined by the following equation, which shows O(log(# of files)) +complexity. + + bucket number to scan in level #n = (hash value) % (# of buckets in level #n) + +In the case of file creation, F2FS finds empty consecutive slots that cover the +file name. F2FS searches the empty slots in the hash tables of whole levels from +1 to N in the same way as the lookup operation. + +The following figure shows an example of two cases holding children. + --------------> Dir <-------------- + | | + child child + + child - child [hole] - child + + child - child - child [hole] - [hole] - child + + Case 1: Case 2: + Number of children = 6, Number of children = 3, + File size = 7 File size = 7 + +Default Block Allocation +------------------------ + +At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node +and Hot/Warm/Cold data. + +- Hot node contains direct node blocks of directories. +- Warm node contains direct node blocks except hot node blocks. +- Cold node contains indirect node blocks +- Hot data contains dentry blocks +- Warm data contains data blocks except hot and cold data blocks +- Cold data contains multimedia data or migrated data blocks + +LFS has two schemes for free space management: threaded log and copy-and-compac- +tion. The copy-and-compaction scheme which is known as cleaning, is well-suited +for devices showing very good sequential write performance, since free segments +are served all the time for writing new data. However, it suffers from cleaning +overhead under high utilization. Contrarily, the threaded log scheme suffers +from random writes, but no cleaning process is needed. F2FS adopts a hybrid +scheme where the copy-and-compaction scheme is adopted by default, but the +policy is dynamically changed to the threaded log scheme according to the file +system status. + +In order to align F2FS with underlying flash-based storage, F2FS allocates a +segment in a unit of section. F2FS expects that the section size would be the +same as the unit size of garbage collection in FTL. Furthermore, with respect +to the mapping granularity in FTL, F2FS allocates each section of the active +logs from different zones as much as possible, since FTL can write the data in +the active logs into one allocation unit according to its mapping granularity. + +Cleaning process +---------------- + +F2FS does cleaning both on demand and in the background. On-demand cleaning is +triggered when there are not enough free segments to serve VFS calls. Background +cleaner is operated by a kernel thread, and triggers the cleaning job when the +system is idle. + +F2FS supports two victim selection policies: greedy and cost-benefit algorithms. +In the greedy algorithm, F2FS selects a victim segment having the smallest number +of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment +according to the segment age and the number of valid blocks in order to address +log block thrashing problem in the greedy algorithm. F2FS adopts the greedy +algorithm for on-demand cleaner, while background cleaner adopts cost-benefit +algorithm. + +In order to identify whether the data in the victim segment are valid or not, +F2FS manages a bitmap. Each bit represents the validity of a block, and the +bitmap is composed of a bit stream covering whole blocks in main area. \ No newline at end of file diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 241c0b045cd..7381367c327 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -3027,6 +3027,9 @@ CONFIG_PROC_PAGE_MONITOR=y CONFIG_REPORT_PRESENT_CPUS=y CONFIG_SYSFS=y CONFIG_TMPFS=y +CONFIG_F2FS_FS=y +CONFIG_F2FS_FS_XATTR=y +CONFIG_F2FS_FS_SECURITY=y # CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_TMPFS_XATTR is not set # CONFIG_HUGETLB_PAGE is not set diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 4b859b32d14..26f4a860a0e 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -482,6 +482,9 @@ CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y +CONFIG_F2FS_FS=y +CONFIG_F2FS_FS_XATTR=y +CONFIG_F2FS_FS_SECURITY=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_PARTITION_ADVANCED=y diff --git a/fs/Kconfig b/fs/Kconfig index 3130a45eafa..aebcee21e5d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -219,6 +219,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" +source "fs/f2fs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index cd17b767c56..63e532972b3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -120,6 +120,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ +obj-$(CONFIG_F2FS_FS) += f2fs/ obj-$(CONFIG_EXOFS_FS) += exofs/ obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ diff --git a/fs/dcache.c b/fs/dcache.c index 8b732a205d5..239f5e664aa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1438,7 +1438,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) return alias; } -static struct dentry * d_find_any_alias(struct inode *inode) +struct dentry * d_find_any_alias(struct inode *inode) { struct dentry *de; @@ -1447,7 +1447,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) spin_unlock(&inode->i_lock); return de; } - +EXPORT_SYMBOL(d_find_any_alias); /** * d_obtain_alias - find or allocate a dentry for a given inode diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig new file mode 100644 index 00000000000..e06e0995e00 --- /dev/null +++ b/fs/f2fs/Kconfig @@ -0,0 +1,65 @@ +config F2FS_FS + tristate "F2FS filesystem support (EXPERIMENTAL)" + depends on BLOCK + help + F2FS is based on Log-structured File System (LFS), which supports + versatile "flash-friendly" features. The design has been focused on + addressing the fundamental issues in LFS, which are snowball effect + of wandering tree and high cleaning overhead. + + Since flash-based storages show different characteristics according to + the internal geometry or flash memory management schemes aka FTL, F2FS + and tools support various parameters not only for configuring on-disk + layout, but also for selecting allocation and cleaning algorithms. + + If unsure, say N. + +config F2FS_STAT_FS + bool "F2FS Status Information" + depends on F2FS_FS && DEBUG_FS + default y + help + /sys/kernel/debug/f2fs/ contains information about all the partitions + mounted as f2fs. Each file shows the whole f2fs information. + + /sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +config F2FS_FS_XATTR + bool "F2FS extended attributes" + depends on F2FS_FS + default y + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + for details). + + If unsure, say N. + +config F2FS_FS_POSIX_ACL + bool "F2FS Access Control Lists" + depends on F2FS_FS_XATTR + select FS_POSIX_ACL + default y + help + Posix Access Control Lists (ACLs) support permissions for users and + gourps beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + + If you don't know what Access Control Lists are, say N + +config F2FS_FS_SECURITY + bool "F2FS Security Labels" + depends on F2FS_FS_XATTR + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the f2fs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile new file mode 100644 index 00000000000..27a0820340b --- /dev/null +++ b/fs/f2fs/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_F2FS_FS) += f2fs.o + +f2fs-y := dir.o file.o inode.o namei.o hash.o super.o +f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o +f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o +f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o +f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c new file mode 100644 index 00000000000..b20ced33113 --- /dev/null +++ b/fs/f2fs/acl.c @@ -0,0 +1,423 @@ +/* + * fs/f2fs/acl.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include "f2fs.h" +#include "xattr.h" +#include "acl.h" + +#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ + (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) + +static inline size_t f2fs_acl_size(int count) +{ + if (count <= 4) { + return sizeof(struct f2fs_acl_header) + + count * sizeof(struct f2fs_acl_entry_short); + } else { + return sizeof(struct f2fs_acl_header) + + 4 * sizeof(struct f2fs_acl_entry_short) + + (count - 4) * sizeof(struct f2fs_acl_entry); + } +} + +static inline int f2fs_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(struct f2fs_acl_header); + s = size - 4 * sizeof(struct f2fs_acl_entry_short); + if (s < 0) { + if (size % sizeof(struct f2fs_acl_entry_short)) + return -1; + return size / sizeof(struct f2fs_acl_entry_short); + } else { + if (s % sizeof(struct f2fs_acl_entry)) + return -1; + return s / sizeof(struct f2fs_acl_entry) + 4; + } +} + +static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) +{ + int i, count; + struct posix_acl *acl; + struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; + struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); + const char *end = value + size; + + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) + return ERR_PTR(-EINVAL); + + count = f2fs_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + + if ((char *)entry > end) + goto fail; + + acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + + case ACL_USER: + case ACL_GROUP: + acl->a_entries[i].e_id = le32_to_cpu(entry->e_id); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + default: + goto fail; + } + } + if ((char *)entry != end) + goto fail; + return acl; +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + struct f2fs_acl_header *f2fs_acl; + struct f2fs_acl_entry *entry; + int i; + + f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * + sizeof(struct f2fs_acl_entry), GFP_KERNEL); + if (!f2fs_acl) + return ERR_PTR(-ENOMEM); + + f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION); + entry = (struct f2fs_acl_entry *)(f2fs_acl + 1); + + for (i = 0; i < acl->a_count; i++) { + + entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = cpu_to_le32(acl->a_entries[i].e_id); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + default: + goto fail; + } + } + *size = f2fs_acl_size(acl->a_count); + return (void *)f2fs_acl; + +fail: + kfree(f2fs_acl); + return ERR_PTR(-EINVAL); +} + +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + void *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(sbi, POSIX_ACL)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + if (type == ACL_TYPE_ACCESS) + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + + retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = f2fs_getxattr(inode, name_index, "", value, retval); + } + + if (retval > 0) + acl = f2fs_acl_from_disk(value, retval); + else if (retval == -ENODATA) + acl = NULL; + else + acl = ERR_PTR(retval); + kfree(value); + + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + return acl; +} + +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + int name_index; + void *value = NULL; + size_t size = 0; + int error; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + error = posix_acl_equiv_mode(acl, &inode->i_mode); + if (error < 0) + return error; + set_acl_inode(fi, inode->i_mode); + if (error == 0) + acl = NULL; + } + break; + + case ACL_TYPE_DEFAULT: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + + if (acl) { + value = f2fs_acl_to_disk(acl, &size); + if (IS_ERR(value)) { + cond_clear_inode_flag(fi, FI_ACL_MODE); + return (int)PTR_ERR(value); + } + } + + error = f2fs_setxattr(inode, name_index, "", value, size, NULL); + + kfree(value); + if (!error) + set_cached_acl(inode, type, acl); + + cond_clear_inode_flag(fi, FI_ACL_MODE); + return error; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(sbi, POSIX_ACL)) { + acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl && !(test_opt(sbi, ANDROID_EMU) && + F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU)) + inode->i_mode &= ~current_umask(); + } + + if (test_opt(sbi, POSIX_ACL) && acl) { + + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + } +cleanup: + posix_acl_release(acl); + return error; +} + +int f2fs_acl_chmod(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct posix_acl *acl; + int error; + mode_t mode = get_inode_mode(inode); + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(mode)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, mode); + if (error) + return error; + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return error; +} + +int f2fs_android_emu(struct f2fs_sb_info *sbi, struct inode *inode, + u32 *uid, u32 *gid, umode_t *mode) +{ + F2FS_I(inode)->i_advise |= FADVISE_ANDROID_EMU; + + if (uid) + *uid = sbi->android_emu_uid; + if (gid) + *gid = sbi->android_emu_gid; + if (mode) { + *mode = (*mode & ~S_IRWXUGO) | sbi->android_emu_mode; + if (F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU_ROOT) + *mode &= ~S_IRWXO; + if (S_ISDIR(*mode)) { + if (*mode & S_IRUSR) + *mode |= S_IXUSR; + if (*mode & S_IRGRP) + *mode |= S_IXGRP; + if (*mode & S_IROTH) + *mode |= S_IXOTH; + } + } + + return 0; +} + +static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const char *xname = POSIX_ACL_XATTR_DEFAULT; + size_t size; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + + if (type == ACL_TYPE_ACCESS) + xname = POSIX_ACL_XATTR_ACCESS; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct posix_acl *acl; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else { + acl = NULL; + } + + error = f2fs_set_acl(inode, type, acl); + +release_and_out: + posix_acl_release(acl); + return error; +} + +const struct xattr_handler f2fs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; + +const struct xattr_handler f2fs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h new file mode 100644 index 00000000000..80f43067441 --- /dev/null +++ b/fs/f2fs/acl.h @@ -0,0 +1,57 @@ +/* + * fs/f2fs/acl.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/acl.h + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_ACL_H__ +#define __F2FS_ACL_H__ + +#include + +#define F2FS_ACL_VERSION 0x0001 + +struct f2fs_acl_entry { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +}; + +struct f2fs_acl_entry_short { + __le16 e_tag; + __le16 e_perm; +}; + +struct f2fs_acl_header { + __le32 a_version; +}; + +#ifdef CONFIG_F2FS_FS_POSIX_ACL + +extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); +extern int f2fs_acl_chmod(struct inode *inode); +extern int f2fs_init_acl(struct inode *inode, struct inode *dir); +#else +#define f2fs_check_acl NULL +#define f2fs_get_acl NULL +#define f2fs_set_acl NULL + +static inline int f2fs_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif +#endif /* __F2FS_ACL_H__ */ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c new file mode 100644 index 00000000000..db6a633362d --- /dev/null +++ b/fs/f2fs/checkpoint.c @@ -0,0 +1,860 @@ +/* + * fs/f2fs/checkpoint.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +static struct kmem_cache *orphan_entry_slab; +static struct kmem_cache *inode_entry_slab; + +/* + * We guarantee no failure on the returned page. + */ +struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page = NULL; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + + /* We wait writeback only inside grab_meta_page() */ + wait_on_page_writeback(page); + SetPageUptodate(page); + return page; +} + +/* + * We guarantee no failure on the returned page. + */ +struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + if (PageUptodate(page)) + goto out; + + if (f2fs_readpage(sbi, page, index, READ_SYNC)) + goto repeat; + + lock_page(page); + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +out: + mark_page_accessed(page); + return page; +} + +static int f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + /* Should not write any meta pages, if any IO error was occurred */ + if (wbc->for_reclaim || sbi->por_doing || + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { + dec_page_count(sbi, F2FS_DIRTY_META); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; + } + + wait_on_page_writeback(page); + + write_meta_page(sbi, page); + dec_page_count(sbi, F2FS_DIRTY_META); + unlock_page(page); + return 0; +} + +static int f2fs_write_meta_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + struct block_device *bdev = sbi->sb->s_bdev; + long written; + + if (wbc->for_kupdate) + return 0; + + if (get_pages(sbi, F2FS_DIRTY_META) == 0) + return 0; + + /* if mounting is failed, skip writing node pages */ + mutex_lock(&sbi->cp_mutex); + written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); + mutex_unlock(&sbi->cp_mutex); + wbc->nr_to_write -= written; + return 0; +} + +long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, + long nr_to_write) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + pgoff_t index = 0, end = LONG_MAX; + struct pagevec pvec; + long nwritten = 0; + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + pagevec_init(&pvec, 0); + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + lock_page(page); + BUG_ON(page->mapping != mapping); + BUG_ON(!PageDirty(page)); + clear_page_dirty_for_io(page); + if (f2fs_write_meta_page(page, &wbc)) { + unlock_page(page); + break; + } + if (nwritten++ >= nr_to_write) + break; + } + pagevec_release(&pvec); + cond_resched(); + } + + if (nwritten) + f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); + + return nwritten; +} + +static int f2fs_set_meta_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_META); + return 1; + } + return 0; +} + +const struct address_space_operations f2fs_meta_aops = { + .writepage = f2fs_write_meta_page, + .writepages = f2fs_write_meta_pages, + .set_page_dirty = f2fs_set_meta_page_dirty, +}; + +int acquire_orphan_inode(struct f2fs_sb_info *sbi) +{ + unsigned int max_orphans; + int err = 0; + + /* + * considering 512 blocks in a segment 5 blocks are needed for cp + * and log segment summaries. Remaining blocks are used to keep + * orphan entries with the limitation one reserved segment + * for cp pack we can have max 1020*507 orphan entries + */ + max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; + mutex_lock(&sbi->orphan_inode_mutex); + if (sbi->n_orphans >= max_orphans) + err = -ENOSPC; + else + sbi->n_orphans++; + mutex_unlock(&sbi->orphan_inode_mutex); + return err; +} + +void release_orphan_inode(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->orphan_inode_mutex); + if (sbi->n_orphans == 0) { + f2fs_msg(sbi->sb, KERN_ERR, "releasing " + "unacquired orphan inode"); + f2fs_handle_error(sbi); + } else + sbi->n_orphans--; + mutex_unlock(&sbi->orphan_inode_mutex); +} + +void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head, *this; + struct orphan_inode_entry *new = NULL, *orphan = NULL; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each(this, head) { + orphan = list_entry(this, struct orphan_inode_entry, list); + if (orphan->ino == ino) + goto out; + if (orphan->ino > ino) + break; + orphan = NULL; + } +retry: + new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + if (!new) { + cond_resched(); + goto retry; + } + new->ino = ino; + + /* add new_oentry into list which is sorted by inode number */ + if (orphan) + list_add(&new->list, this->prev); + else + list_add_tail(&new->list, head); +out: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head; + struct orphan_inode_entry *orphan; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each_entry(orphan, head, list) { + if (orphan->ino == ino) { + list_del(&orphan->list); + kmem_cache_free(orphan_entry_slab, orphan); + if (sbi->n_orphans == 0) { + f2fs_msg(sbi->sb, KERN_ERR, "removing " + "unacquired orphan inode %d", + ino); + f2fs_handle_error(sbi); + } else + sbi->n_orphans--; + break; + } + } + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct inode *inode = f2fs_iget(sbi->sb, ino); + if (IS_ERR(inode)) { + f2fs_msg(sbi->sb, KERN_ERR, "unable to recover orphan inode %d", + ino); + f2fs_handle_error(sbi); + return; + } + clear_nlink(inode); + + /* truncate all the data during iput */ + iput(inode); +} + +int recover_orphan_inodes(struct f2fs_sb_info *sbi) +{ + block_t start_blk, orphan_blkaddr, i, j; + + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) + return 0; + + sbi->por_doing = 1; + start_blk = __start_cp_addr(sbi) + 1; + orphan_blkaddr = __start_sum_addr(sbi) - 1; + + for (i = 0; i < orphan_blkaddr; i++) { + struct page *page = get_meta_page(sbi, start_blk + i); + struct f2fs_orphan_block *orphan_blk; + + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { + nid_t ino = le32_to_cpu(orphan_blk->ino[j]); + recover_orphan_inode(sbi, ino); + } + f2fs_put_page(page, 1); + } + /* clear Orphan Flag */ + clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); + sbi->por_doing = 0; + return 0; +} + +static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) +{ + struct list_head *head, *this, *next; + struct f2fs_orphan_block *orphan_blk = NULL; + struct page *page = NULL; + unsigned int nentries = 0; + unsigned short index = 1; + unsigned short orphan_blocks; + + orphan_blocks = (unsigned short)((sbi->n_orphans + + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + + /* loop for each orphan inode entry and write them in Jornal block */ + list_for_each_safe(this, next, head) { + struct orphan_inode_entry *orphan; + + orphan = list_entry(this, struct orphan_inode_entry, list); + + if (nentries == F2FS_ORPHANS_PER_BLOCK) { + /* + * an orphan block is full of 1020 entries, + * then we need to flush current orphan blocks + * and bring another one in memory + */ + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); + index++; + start_blk++; + nentries = 0; + page = NULL; + } + if (page) + goto page_exist; + + page = grab_meta_page(sbi, start_blk); + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + memset(orphan_blk, 0, sizeof(*orphan_blk)); +page_exist: + orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); + } + if (!page) + goto end; + + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); +end: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, + block_t cp_addr, unsigned long long *version) +{ + struct page *cp_page_1, *cp_page_2 = NULL; + unsigned long blk_size = sbi->blocksize; + struct f2fs_checkpoint *cp_block; + unsigned long long cur_version = 0, pre_version = 0; + size_t crc_offset; + __u32 crc = 0; + + /* Read the 1st cp block in this CP pack */ + cp_page_1 = get_meta_page(sbi, cp_addr); + + /* get the version number */ + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp1; + + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp1; + + pre_version = cur_cp_version(cp_block); + + /* Read the 2nd cp block in this CP pack */ + cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_page_2 = get_meta_page(sbi, cp_addr); + + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp2; + + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp2; + + cur_version = cur_cp_version(cp_block); + + if (cur_version == pre_version) { + *version = cur_version; + f2fs_put_page(cp_page_2, 1); + return cp_page_1; + } +invalid_cp2: + f2fs_put_page(cp_page_2, 1); +invalid_cp1: + f2fs_put_page(cp_page_1, 1); + return NULL; +} + +int get_valid_checkpoint(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *cp_block; + struct f2fs_super_block *fsb = sbi->raw_super; + struct page *cp1, *cp2, *cur_page; + unsigned long blk_size = sbi->blocksize; + unsigned long long cp1_version = 0, cp2_version = 0; + unsigned long long cp_start_blk_no; + + sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + if (!sbi->ckpt) + return -ENOMEM; + /* + * Finding out valid cp block involves read both + * sets( cp pack1 and cp pack 2) + */ + cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); + cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); + + /* The second checkpoint pack should start at the next segment */ + cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); + + if (cp1 && cp2) { + if (ver_after(cp2_version, cp1_version)) + cur_page = cp2; + else + cur_page = cp1; + } else if (cp1) { + cur_page = cp1; + } else if (cp2) { + cur_page = cp2; + } else { + goto fail_no_cp; + } + + cp_block = (struct f2fs_checkpoint *)page_address(cur_page); + memcpy(sbi->ckpt, cp_block, blk_size); + + f2fs_put_page(cp1, 1); + f2fs_put_page(cp2, 1); + return 0; + +fail_no_cp: + kfree(sbi->ckpt); + return -EINVAL; +} + +static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) + return -EEXIST; + } + list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs++; +#endif + return 0; +} + +void set_dirty_dir_page(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; + + if (!S_ISDIR(inode->i_mode)) + return; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); + + inc_page_count(sbi, F2FS_DIRTY_DENTS); + inode_inc_dirty_dents(inode); + SetPagePrivate(page); + spin_unlock(&sbi->dir_inode_lock); +} + +void add_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); + spin_unlock(&sbi->dir_inode_lock); +} + +void remove_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + + if (!S_ISDIR(inode->i_mode)) + return; + + spin_lock(&sbi->dir_inode_lock); + if (atomic_read(&F2FS_I(inode)->dirty_dents)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } + + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) { + list_del(&entry->list); + kmem_cache_free(inode_entry_slab, entry); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs--; +#endif + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + + /* Only from the recovery routine */ + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { + clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); + iput(inode); + } +} + +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + struct inode *inode = NULL; + + spin_lock(&sbi->dir_inode_lock); + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode->i_ino == ino) { + inode = entry->inode; + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + return inode; +} + +void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->dir_inode_list; + struct dir_inode_entry *entry; + struct inode *inode; +retry: + spin_lock(&sbi->dir_inode_lock); + if (list_empty(head)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } + entry = list_entry(head->next, struct dir_inode_entry, list); + inode = igrab(entry->inode); + spin_unlock(&sbi->dir_inode_lock); + if (inode) { + filemap_flush(inode->i_mapping); + iput(inode); + } else { + /* + * We should submit bio, since it exists several + * wribacking dentry pages in the freeing inode. + */ + f2fs_submit_bio(sbi, DATA, true); + } + goto retry; +} + +/* + * Freeze all the FS-operations for checkpoint. + */ +static void block_operations(struct f2fs_sb_info *sbi) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + struct blk_plug plug; + + blk_start_plug(&plug); + +retry_flush_dents: + mutex_lock_all(sbi); + + /* write all the dirty dentry pages */ + if (get_pages(sbi, F2FS_DIRTY_DENTS)) { + mutex_unlock_all(sbi); + sync_dirty_dir_inodes(sbi); + goto retry_flush_dents; + } + + /* + * POR: we should ensure that there is no dirty node pages + * until finishing nat/sit flush. + */ +retry_flush_nodes: + mutex_lock(&sbi->node_write); + + if (get_pages(sbi, F2FS_DIRTY_NODES)) { + mutex_unlock(&sbi->node_write); + sync_node_pages(sbi, 0, &wbc); + goto retry_flush_nodes; + } + blk_finish_plug(&plug); +} + +static void unblock_operations(struct f2fs_sb_info *sbi) +{ + mutex_unlock(&sbi->node_write); + mutex_unlock_all(sbi); +} + +static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + nid_t last_nid = 0; + block_t start_blk; + struct page *cp_page; + unsigned int data_sum_blocks, orphan_blocks; + __u32 crc32 = 0; + void *kaddr; + int i; + + /* Flush all the NAT/SIT pages */ + while (get_pages(sbi, F2FS_DIRTY_META)) + sync_meta_pages(sbi, META, LONG_MAX); + + next_free_nid(sbi, &last_nid); + + /* + * modify checkpoint + * version number is already updated + */ + ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); + for (i = 0; i < 3; i++) { + ckpt->cur_node_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); + ckpt->cur_node_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); + ckpt->alloc_type[i + CURSEG_HOT_NODE] = + curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); + } + for (i = 0; i < 3; i++) { + ckpt->cur_data_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); + ckpt->cur_data_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); + ckpt->alloc_type[i + CURSEG_HOT_DATA] = + curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); + } + + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* 2 cp + n data seg summary + orphan inode blocks */ + data_sum_blocks = npages_for_summary_flush(sbi); + if (data_sum_blocks < 3) + set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + else + clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + + orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) + / F2FS_ORPHANS_PER_BLOCK; + ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + + if (is_umount) { + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + } else { + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks); + } + + if (sbi->n_orphans) + set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + else + clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + + /* update SIT/NAT bitmap */ + get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); + get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); + + crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); + *((__le32 *)((unsigned char *)ckpt + + le32_to_cpu(ckpt->checksum_offset))) + = cpu_to_le32(crc32); + + start_blk = __start_cp_addr(sbi); + + /* write out checkpoint buffer at block 0 */ + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + if (sbi->n_orphans) { + write_orphan_inodes(sbi, start_blk); + start_blk += orphan_blocks; + } + + write_data_summaries(sbi, start_blk); + start_blk += data_sum_blocks; + if (is_umount) { + write_node_summaries(sbi, start_blk); + start_blk += NR_CURSEG_NODE_TYPE; + } + + /* writeout checkpoint block */ + cp_page = grab_meta_page(sbi, start_blk); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + /* wait for previous submitted node/meta pages writeback */ + while (get_pages(sbi, F2FS_WRITEBACK)) + congestion_wait(BLK_RW_ASYNC, HZ / 50); + + filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); + filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + + /* Here, we only have one bio having CP pack */ + sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + clear_prefree_segments(sbi); + F2FS_RESET_SB_DIRT(sbi); + } +} + +/* + * We guarantee that this checkpoint procedure should not fail. + */ +void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_ver; + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); + + mutex_lock(&sbi->cp_mutex); + block_operations(sbi); + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); + + f2fs_submit_bio(sbi, DATA, true); + f2fs_submit_bio(sbi, NODE, true); + f2fs_submit_bio(sbi, META, true); + + /* + * update checkpoint pack index + * Increase the version number so that + * SIT entries and seg summaries are written at correct place + */ + ckpt_ver = cur_cp_version(ckpt); + ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); + + /* write cached NAT/SIT entries to NAT/SIT area */ + flush_nat_entries(sbi); + flush_sit_entries(sbi); + + /* unlock all the fs_lock[] in do_checkpoint() */ + do_checkpoint(sbi, is_umount); + + unblock_operations(sbi); + mutex_unlock(&sbi->cp_mutex); + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); +} + +void init_orphan_info(struct f2fs_sb_info *sbi) +{ + mutex_init(&sbi->orphan_inode_mutex); + INIT_LIST_HEAD(&sbi->orphan_inode_list); + sbi->n_orphans = 0; +} + +int __init create_checkpoint_caches(void) +{ + orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", + sizeof(struct orphan_inode_entry), NULL); + if (unlikely(!orphan_entry_slab)) + return -ENOMEM; + inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", + sizeof(struct dir_inode_entry), NULL); + if (unlikely(!inode_entry_slab)) { + kmem_cache_destroy(orphan_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_checkpoint_caches(void) +{ + kmem_cache_destroy(orphan_entry_slab); + kmem_cache_destroy(inode_entry_slab); +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c new file mode 100644 index 00000000000..550adc3cc6b --- /dev/null +++ b/fs/f2fs/data.c @@ -0,0 +1,790 @@ +/* + * fs/f2fs/data.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +/* + * Lock ordering for the change of data block address: + * ->data_page + * ->node_page + * update block addresses in the node page + */ +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +{ + struct f2fs_node *rn; + __le32 *addr_array; + struct page *node_page = dn->node_page; + unsigned int ofs_in_node = dn->ofs_in_node; + + f2fs_wait_on_page_writeback(node_page, NODE, false); + + rn = F2FS_NODE(node_page); + + /* Get physical address of data block */ + addr_array = blkaddr_in_node(rn); + addr_array[ofs_in_node] = cpu_to_le32(new_addr); + set_page_dirty(node_page); +} + +int reserve_new_block(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return -EPERM; + if (!inc_valid_block_count(sbi, dn->inode, 1)) + return -ENOSPC; + + trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); + + __set_data_blkaddr(dn, NEW_ADDR); + dn->data_blkaddr = NEW_ADDR; + sync_inode_page(dn); + return 0; +} + +static int check_extent_cache(struct inode *inode, pgoff_t pgofs, + struct buffer_head *bh_result) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif + pgoff_t start_fofs, end_fofs; + block_t start_blkaddr; + + read_lock(&fi->ext.ext_lock); + if (fi->ext.len == 0) { + read_unlock(&fi->ext.ext_lock); + return 0; + } + +#ifdef CONFIG_F2FS_STAT_FS + sbi->total_hit_ext++; +#endif + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + + if (pgofs >= start_fofs && pgofs <= end_fofs) { + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + size_t count; + + clear_buffer_new(bh_result); + map_bh(bh_result, inode->i_sb, + start_blkaddr + pgofs - start_fofs); + count = end_fofs - pgofs + 1; + if (count < (UINT_MAX >> blkbits)) + bh_result->b_size = (count << blkbits); + else + bh_result->b_size = UINT_MAX; + +#ifdef CONFIG_F2FS_STAT_FS + sbi->read_hit_ext++; +#endif + read_unlock(&fi->ext.ext_lock); + return 1; + } + read_unlock(&fi->ext.ext_lock); + return 0; +} + +void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +{ + struct f2fs_inode_info *fi = F2FS_I(dn->inode); + pgoff_t fofs, start_fofs, end_fofs; + block_t start_blkaddr, end_blkaddr; + + BUG_ON(blk_addr == NEW_ADDR); + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + + /* Update the page address in the parent node */ + __set_data_blkaddr(dn, blk_addr); + + write_lock(&fi->ext.ext_lock); + + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; + + /* Drop and initialize the matched extent */ + if (fi->ext.len == 1 && fofs == start_fofs) + fi->ext.len = 0; + + /* Initial extent */ + if (fi->ext.len == 0) { + if (blk_addr != NULL_ADDR) { + fi->ext.fofs = fofs; + fi->ext.blk_addr = blk_addr; + fi->ext.len = 1; + } + goto end_update; + } + + /* Front merge */ + if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + fi->ext.fofs--; + fi->ext.blk_addr--; + fi->ext.len++; + goto end_update; + } + + /* Back merge */ + if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + fi->ext.len++; + goto end_update; + } + + /* Split the existing extent */ + if (fi->ext.len > 1 && + fofs >= start_fofs && fofs <= end_fofs) { + if ((end_fofs - fofs) < (fi->ext.len >> 1)) { + fi->ext.len = fofs - start_fofs; + } else { + fi->ext.fofs = fofs + 1; + fi->ext.blk_addr = start_blkaddr + + fofs - start_fofs + 1; + fi->ext.len -= fofs - start_fofs + 1; + } + goto end_update; + } + write_unlock(&fi->ext.ext_lock); + return; + +end_update: + write_unlock(&fi->ext.ext_lock); + sync_inode_page(dn); +} + +struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + + page = find_get_page(mapping, index); + if (page && PageUptodate(page)) + return page; + f2fs_put_page(page, 0); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) + return ERR_PTR(err); + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) + return ERR_PTR(-ENOENT); + + /* By fallocate(), there is no cached page, but with NEW_ADDR */ + if (dn.data_blkaddr == NEW_ADDR) + return ERR_PTR(-EINVAL); + + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) { + unlock_page(page); + return page; + } + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, + sync ? READ_SYNC : READA); + if (sync) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); + } + } + return page; +} + +/* + * If it tries to access a hole, return an error. + * Because, the callers, functions in dir.c and GC, should be able to know + * whether this page exists or not. + */ +struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + +repeat: + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return ERR_PTR(-ENOMEM); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_page(page, 1); + return ERR_PTR(-ENOENT); + } + + if (PageUptodate(page)) + return page; + + /* + * A new dentry page is allocated but not able to be written, since its + * new inode page couldn't be allocated due to -ENOSPC. + * In such the case, its blkaddr can be remained as NEW_ADDR. + * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. + */ + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return page; + } + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return ERR_PTR(err); + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + return page; +} + +/* + * Caller ensures that this data page is never allocated. + * A new zero-filled data page is allocated in the page cache. + * + * Also, caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + * Note that, npage is set only by make_empty_dir. + */ +struct page *get_new_data_page(struct inode *inode, + struct page *npage, pgoff_t index, bool new_i_size) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct page *page; + struct dnode_of_data dn; + int err; + + set_new_dnode(&dn, inode, npage, npage, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (err) + return ERR_PTR(err); + + if (dn.data_blkaddr == NULL_ADDR) { + if (reserve_new_block(&dn)) { + if (!npage) + f2fs_put_dnode(&dn); + return ERR_PTR(-ENOSPC); + } + } + if (!npage) + f2fs_put_dnode(&dn); +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) + return page; + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return ERR_PTR(err); + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + } + + if (new_i_size && + i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { + i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + /* Only the directory inode sets new_i_size */ + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + mark_inode_dirty_sync(inode); + } + return page; +} + +static void read_end_io(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } while (bvec >= bio->bi_io_vec); + bio_put(bio); +} + +/* + * Fill the locked page with data located in the block address. + * Return unlocked page. + */ +int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, int type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + + trace_f2fs_readpage(page, blk_addr, type); + + down_read(&sbi->bio_sem); + + /* Allocate a new bio */ + bio = f2fs_bio_alloc(bdev, 1); + + /* Initialize the bio */ + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + bio->bi_end_io = read_end_io; + + if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + bio_put(bio); + up_read(&sbi->bio_sem); + f2fs_put_page(page, 1); + return -EFAULT; + } + + submit_bio(type, bio); + up_read(&sbi->bio_sem); + return 0; +} + +/* + * This function should be used by the data read flow only where it + * does not check the "create" flag that indicates block allocation. + * The reason for this special functionality is to exploit VFS readahead + * mechanism. + */ +static int get_data_block_ro(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + unsigned maxblocks = bh_result->b_size >> blkbits; + struct dnode_of_data dn; + pgoff_t pgofs; + int err; + + /* Get the page offset from the block offset(iblock) */ + pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); + + if (check_extent_cache(inode, pgofs, bh_result)) { + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); + return 0; + } + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + if (err) { + trace_f2fs_get_data_block(inode, iblock, bh_result, err); + return (err == -ENOENT) ? 0 : err; + } + + /* It does not support data allocation */ + BUG_ON(create); + + if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { + int i; + unsigned int end_offset; + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : + ADDRS_PER_BLOCK; + + clear_buffer_new(bh_result); + + /* Give more consecutive addresses for the read ahead */ + for (i = 0; i < end_offset - dn.ofs_in_node; i++) + if (((datablock_addr(dn.node_page, + dn.ofs_in_node + i)) + != (dn.data_blkaddr + i)) || maxblocks == i) + break; + map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + bh_result->b_size = (i << blkbits); + } + f2fs_put_dnode(&dn); + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); + return 0; +} + +static int f2fs_read_data_page(struct file *file, struct page *page) +{ + return mpage_readpage(page, get_data_block_ro); +} + +static int f2fs_read_data_pages(struct file *file, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); +} + +int do_write_data_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + block_t old_blk_addr, new_blk_addr; + struct dnode_of_data dn; + int err = 0; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + if (err) + return err; + + old_blk_addr = dn.data_blkaddr; + + /* This page is already truncated */ + if (old_blk_addr == NULL_ADDR) + goto out_writepage; + + set_page_writeback(page); + + /* + * If current allocation needs SSR, + * it had better in-place writes for updated data. + */ + if (unlikely(old_blk_addr != NEW_ADDR && + !is_cold_data(page) && + need_inplace_update(inode))) { + rewrite_data_page(F2FS_SB(inode->i_sb), page, + old_blk_addr); + } else { + write_data_page(inode, page, &dn, + old_blk_addr, &new_blk_addr); + update_extent_cache(new_blk_addr, &dn); + } +out_writepage: + f2fs_put_dnode(&dn); + return err; +} + +static int f2fs_write_data_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = ((unsigned long long) i_size) + >> PAGE_CACHE_SHIFT; + unsigned offset; + bool need_balance_fs = false; + int err = 0; + + if (page->index < end_index) + goto write; + + /* + * If the offset is out-of-range of file size, + * this page does not have to be written to disk. + */ + offset = i_size & (PAGE_CACHE_SIZE - 1); + if ((page->index >= end_index + 1) || !offset) { + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + goto out; + } + + zero_user_segment(page, offset, PAGE_CACHE_SIZE); +write: + if (sbi->por_doing) { + err = AOP_WRITEPAGE_ACTIVATE; + goto redirty_out; + } + + /* Dentry blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + err = do_write_data_page(page); + } else { + int ilock = mutex_lock_op(sbi); + err = do_write_data_page(page); + mutex_unlock_op(sbi, ilock); + need_balance_fs = true; + } + if (err == -ENOENT) + goto out; + else if (err) + goto redirty_out; + + if (wbc->for_reclaim) + f2fs_submit_bio(sbi, DATA, true); + + clear_cold_data(page); +out: + unlock_page(page); + if (need_balance_fs) + f2fs_balance_fs(sbi); + return 0; + +redirty_out: + wbc->pages_skipped++; + set_page_dirty(page); + return err; +} + +#define MAX_DESIRED_PAGES_WP 4096 + +static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct address_space *mapping = data; + int ret = mapping->a_ops->writepage(page, wbc); + mapping_set_error(mapping, ret); + return ret; +} + +static int f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + bool locked = false; + int ret; + long excess_nrtw = 0, desired_nrtw; + + /* deal with chardevs and other special file */ + if (!mapping->a_ops->writepage) + return 0; + + if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { + desired_nrtw = MAX_DESIRED_PAGES_WP; + excess_nrtw = desired_nrtw - wbc->nr_to_write; + wbc->nr_to_write = desired_nrtw; + } + + if (!S_ISDIR(inode->i_mode)) { + mutex_lock(&sbi->writepages); + locked = true; + } + ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + if (locked) + mutex_unlock(&sbi->writepages); + f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); + + remove_dirty_dir_inode(inode); + + wbc->nr_to_write -= excess_nrtw; + return ret; +} + +static int f2fs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; + struct dnode_of_data dn; + int err = 0; + int ilock; + + f2fs_balance_fs(sbi); +repeat: + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + ilock = mutex_lock_op(sbi); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (err) + goto err; + + if (dn.data_blkaddr == NULL_ADDR) + err = reserve_new_block(&dn); + + f2fs_put_dnode(&dn); + if (err) + goto err; + + mutex_unlock_op(sbi, ilock); + + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) + return 0; + + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + unsigned end = start + len; + + /* Reading beyond i_size is simple: memset to zero */ + zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); + goto out; + } + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return err; + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return -EIO; + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + } +out: + SetPageUptodate(page); + clear_cold_data(page); + return 0; + +err: + mutex_unlock_op(sbi, ilock); + f2fs_put_page(page, 1); + return err; +} + +static int f2fs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + SetPageUptodate(page); + set_page_dirty(page); + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + unlock_page(page); + page_cache_release(page); + return copied; +} + +static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) + return 0; + + /* Needs synchronization with the cleaner */ + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + get_data_block_ro); +} + +static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode) && PageDirty(page)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + ClearPagePrivate(page); +} + +static int f2fs_release_data_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + +static int f2fs_set_data_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + set_dirty_dir_page(inode, page); + return 1; + } + return 0; +} + +static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) +{ + return generic_block_bmap(mapping, block, get_data_block_ro); +} + +const struct address_space_operations f2fs_dblock_aops = { + .readpage = f2fs_read_data_page, + .readpages = f2fs_read_data_pages, + .writepage = f2fs_write_data_page, + .writepages = f2fs_write_data_pages, + .write_begin = f2fs_write_begin, + .write_end = f2fs_write_end, + .set_page_dirty = f2fs_set_data_page_dirty, + .invalidatepage = f2fs_invalidate_data_page, + .releasepage = f2fs_release_data_page, + .direct_IO = f2fs_direct_IO, + .bmap = f2fs_bmap, +}; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c new file mode 100644 index 00000000000..a84b0a8e685 --- /dev/null +++ b/fs/f2fs/debug.c @@ -0,0 +1,353 @@ +/* + * f2fs debugging statistics + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Copyright (c) 2012 Linux Foundation + * Copyright (c) 2012 Greg Kroah-Hartman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" + +static LIST_HEAD(f2fs_stat_list); +static struct dentry *debugfs_root; +static DEFINE_MUTEX(f2fs_stat_mutex); + +static void update_general_status(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + int i; + + /* valid check of the segment numbers */ + si->hit_ext = sbi->read_hit_ext; + si->total_ext = sbi->total_hit_ext; + si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); + si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); + si->ndirty_dirs = sbi->n_dirty_dirs; + si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; + si->rsvd_segs = reserved_segments(sbi); + si->overp_segs = overprovision_segments(sbi); + si->valid_count = valid_user_blocks(sbi); + si->valid_node_count = valid_node_count(sbi); + si->valid_inode_count = valid_inode_count(sbi); + si->utilization = utilization(sbi); + + si->free_segs = free_segments(sbi); + si->free_secs = free_sections(sbi); + si->prefree_count = prefree_segments(sbi); + si->dirty_count = dirty_segments(sbi); + si->node_pages = sbi->node_inode->i_mapping->nrpages; + si->meta_pages = sbi->meta_inode->i_mapping->nrpages; + si->nats = NM_I(sbi)->nat_cnt; + si->sits = SIT_I(sbi)->dirty_sentries; + si->fnids = NM_I(sbi)->fcnt; + si->bg_gc = sbi->bg_gc; + si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_valid = (int)(written_block_count(sbi) >> + sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_invalid = 50 - si->util_free - si->util_valid; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + si->curseg[i] = curseg->segno; + si->cursec[i] = curseg->segno / sbi->segs_per_sec; + si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + } + + for (i = 0; i < 2; i++) { + si->segment_count[i] = sbi->segment_count[i]; + si->block_count[i] = sbi->block_count[i]; + } +} + +/* + * This function calculates BDF of every segments + */ +static void update_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno, vblocks; + int ndirty = 0; + + bimodal = 0; + total_vblocks = 0; + blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); + hblks_per_sec = blks_per_sec / 2; + mutex_lock(&sit_i->sentry_lock); + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + dist = abs(vblocks - hblks_per_sec); + bimodal += dist * dist; + + if (vblocks > 0 && vblocks < blks_per_sec) { + total_vblocks += vblocks; + ndirty++; + } + } + mutex_unlock(&sit_i->sentry_lock); + dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; + si->bimodal = bimodal / dist; + if (si->dirty_count) + si->avg_vblocks = total_vblocks / ndirty; + else + si->avg_vblocks = 0; +} + +/* + * This function calculates memory footprint. + */ +static void update_mem_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned npages; + + if (si->base_mem) + goto get_cache; + + si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + si->base_mem += 2 * sizeof(struct f2fs_inode_info); + si->base_mem += sizeof(*sbi->ckpt); + + /* build sm */ + si->base_mem += sizeof(struct f2fs_sm_info); + + /* build sit */ + si->base_mem += sizeof(struct sit_info); + si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); + if (sbi->segs_per_sec > 1) + si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); + si->base_mem += __bitmap_size(sbi, SIT_BITMAP); + + /* build free segmap */ + si->base_mem += sizeof(struct free_segmap_info); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + + /* build curseg */ + si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; + si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE; + + /* build dirty segmap */ + si->base_mem += sizeof(struct dirty_seglist_info); + si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + + /* buld nm */ + si->base_mem += sizeof(struct f2fs_nm_info); + si->base_mem += __bitmap_size(sbi, NAT_BITMAP); + + /* build gc */ + si->base_mem += sizeof(struct f2fs_gc_kthread); + +get_cache: + /* free nids */ + si->cache_mem = NM_I(sbi)->fcnt; + si->cache_mem += NM_I(sbi)->nat_cnt; + npages = sbi->node_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + npages = sbi->meta_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); +} + +static int stat_show(struct seq_file *s, void *v) +{ + struct f2fs_stat_info *si; + int i = 0; + int j; + + mutex_lock(&f2fs_stat_mutex); + list_for_each_entry(si, &f2fs_stat_list, stat_list) { + char devname[BDEVNAME_SIZE]; + + update_general_status(si->sbi); + + seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n", + bdevname(si->sbi->sb->s_bdev, devname), i++); + seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", + si->sit_area_segs, si->nat_area_segs); + seq_printf(s, "[SSA: %d] [MAIN: %d", + si->ssa_area_segs, si->main_area_segs); + seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", + si->overp_segs, si->rsvd_segs); + seq_printf(s, "Utilization: %d%% (%d valid blocks)\n", + si->utilization, si->valid_count); + seq_printf(s, " - Node: %u (Inode: %u, ", + si->valid_node_count, si->valid_inode_count); + seq_printf(s, "Other: %u)\n - Data: %u\n", + si->valid_node_count - si->valid_inode_count, + si->valid_count - si->valid_node_count); + seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", + si->main_area_segs, si->main_area_sections, + si->main_area_zones); + seq_printf(s, " - COLD data: %d, %d, %d\n", + si->curseg[CURSEG_COLD_DATA], + si->cursec[CURSEG_COLD_DATA], + si->curzone[CURSEG_COLD_DATA]); + seq_printf(s, " - WARM data: %d, %d, %d\n", + si->curseg[CURSEG_WARM_DATA], + si->cursec[CURSEG_WARM_DATA], + si->curzone[CURSEG_WARM_DATA]); + seq_printf(s, " - HOT data: %d, %d, %d\n", + si->curseg[CURSEG_HOT_DATA], + si->cursec[CURSEG_HOT_DATA], + si->curzone[CURSEG_HOT_DATA]); + seq_printf(s, " - Dir dnode: %d, %d, %d\n", + si->curseg[CURSEG_HOT_NODE], + si->cursec[CURSEG_HOT_NODE], + si->curzone[CURSEG_HOT_NODE]); + seq_printf(s, " - File dnode: %d, %d, %d\n", + si->curseg[CURSEG_WARM_NODE], + si->cursec[CURSEG_WARM_NODE], + si->curzone[CURSEG_WARM_NODE]); + seq_printf(s, " - Indir nodes: %d, %d, %d\n", + si->curseg[CURSEG_COLD_NODE], + si->cursec[CURSEG_COLD_NODE], + si->curzone[CURSEG_COLD_NODE]); + seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", + si->main_area_segs - si->dirty_count - + si->prefree_count - si->free_segs, + si->dirty_count); + seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", + si->prefree_count, si->free_segs, si->free_secs); + seq_printf(s, "GC calls: %d (BG: %d)\n", + si->call_count, si->bg_gc); + seq_printf(s, " - data segments : %d\n", si->data_segs); + seq_printf(s, " - node segments : %d\n", si->node_segs); + seq_printf(s, "Try to move %d blocks\n", si->tot_blks); + seq_printf(s, " - data blocks : %d\n", si->data_blks); + seq_printf(s, " - node blocks : %d\n", si->node_blks); + seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", + si->hit_ext, si->total_ext); + seq_printf(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - nodes %4d in %4d\n", + si->ndirty_node, si->node_pages); + seq_printf(s, " - dents %4d in dirs:%4d\n", + si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - meta %4d in %4d\n", + si->ndirty_meta, si->meta_pages); + seq_printf(s, " - NATs %5d > %lu\n", + si->nats, NM_WOUT_THRESHOLD); + seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", + si->sits, si->fnids); + seq_puts(s, "\nDistribution of User Blocks:"); + seq_puts(s, " [ valid | invalid | free ]\n"); + seq_puts(s, " ["); + + for (j = 0; j < si->util_valid; j++) + seq_putc(s, '-'); + seq_putc(s, '|'); + + for (j = 0; j < si->util_invalid; j++) + seq_putc(s, '-'); + seq_putc(s, '|'); + + for (j = 0; j < si->util_free; j++) + seq_putc(s, '-'); + seq_puts(s, "]\n\n"); + seq_printf(s, "SSR: %u blocks in %u segments\n", + si->block_count[SSR], si->segment_count[SSR]); + seq_printf(s, "LFS: %u blocks in %u segments\n", + si->block_count[LFS], si->segment_count[LFS]); + + /* segment usage info */ + update_sit_info(si->sbi); + seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", + si->bimodal, si->avg_vblocks); + + /* memory footprint */ + update_mem_info(si->sbi); + seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", + (si->base_mem + si->cache_mem) >> 10, + si->base_mem >> 10, si->cache_mem >> 10); + } + mutex_unlock(&f2fs_stat_mutex); + return 0; +} + +static int stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, stat_show, inode->i_private); +} + +static const struct file_operations stat_fops = { + .open = stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int f2fs_build_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_stat_info *si; + + si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + if (!si) + return -ENOMEM; + + si->all_area_segs = le32_to_cpu(raw_super->segment_count); + si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); + si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); + si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa); + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + si->sbi = sbi; + sbi->stat_info = si; + + mutex_lock(&f2fs_stat_mutex); + list_add_tail(&si->stat_list, &f2fs_stat_list); + mutex_unlock(&f2fs_stat_mutex); + + return 0; +} + +void f2fs_destroy_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + mutex_lock(&f2fs_stat_mutex); + list_del(&si->stat_list); + mutex_unlock(&f2fs_stat_mutex); + + kfree(si); +} + +void __init f2fs_create_root_stats(void) +{ + debugfs_root = debugfs_create_dir("f2fs", NULL); + if (debugfs_root) + debugfs_create_file("status", S_IRUGO, debugfs_root, + NULL, &stat_fops); +} + +void f2fs_destroy_root_stats(void) +{ + debugfs_remove_recursive(debugfs_root); + debugfs_root = NULL; +} diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c new file mode 100644 index 00000000000..11cdb75aa0a --- /dev/null +++ b/fs/f2fs/dir.c @@ -0,0 +1,714 @@ +/* + * fs/f2fs/dir.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "f2fs.h" +#include "node.h" +#include "acl.h" +#include "xattr.h" + +static unsigned long dir_blocks(struct inode *inode) +{ + return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) + >> PAGE_CACHE_SHIFT; +} + +static unsigned int dir_buckets(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 1 << level; + else + return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); +} + +static unsigned int bucket_blocks(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 2; + else + return 4; +} + +static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { + [F2FS_FT_UNKNOWN] = DT_UNKNOWN, + [F2FS_FT_REG_FILE] = DT_REG, + [F2FS_FT_DIR] = DT_DIR, + [F2FS_FT_CHRDEV] = DT_CHR, + [F2FS_FT_BLKDEV] = DT_BLK, + [F2FS_FT_FIFO] = DT_FIFO, + [F2FS_FT_SOCK] = DT_SOCK, + [F2FS_FT_SYMLINK] = DT_LNK, +}; + +#define S_SHIFT 12 +static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, + [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, +}; + +static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; +} + +static unsigned long dir_block_index(unsigned int level, unsigned int idx) +{ + unsigned long i; + unsigned long bidx = 0; + + for (i = 0; i < level; i++) + bidx += dir_buckets(i) * bucket_blocks(i); + bidx += idx * bucket_blocks(level); + return bidx; +} + +static bool early_match_name(const char *name, size_t namelen, + f2fs_hash_t namehash, struct f2fs_dir_entry *de) +{ + if (le16_to_cpu(de->name_len) != namelen) + return false; + + if (de->hash_code != namehash) + return false; + + return true; +} + +static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, + const char *name, size_t namelen, int *max_slots, + f2fs_hash_t namehash, struct page **res_page, + bool nocase) +{ + struct f2fs_dir_entry *de; + unsigned long bit_pos, end_pos, next_pos; + struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); + int slots; + + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, 0); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + de = &dentry_blk->dentry[bit_pos]; + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + + if (nocase) { + if ((le16_to_cpu(de->name_len) == namelen) && + !strncasecmp(dentry_blk->filename[bit_pos], + name, namelen)) { + *res_page = dentry_page; + goto found; + } + } else if (early_match_name(name, namelen, namehash, de)) { + if (!memcmp(dentry_blk->filename[bit_pos], + name, namelen)) { + *res_page = dentry_page; + goto found; + } + } + next_pos = bit_pos + slots; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, next_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + end_pos = NR_DENTRY_IN_BLOCK; + else + end_pos = bit_pos; + if (*max_slots < end_pos - next_pos) + *max_slots = end_pos - next_pos; + } + + de = NULL; + kunmap(dentry_page); +found: + return de; +} + +static struct f2fs_dir_entry *find_in_level(struct inode *dir, + unsigned int level, const char *name, size_t namelen, + f2fs_hash_t namehash, struct page **res_page) +{ + int s = GET_DENTRY_SLOTS(namelen); + unsigned int nbucket, nblock; + unsigned int bidx, end_block; + struct page *dentry_page; + struct f2fs_dir_entry *de = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + bool room = false; + int max_slots = 0; + + BUG_ON(level > MAX_DIR_HASH_DEPTH); + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); + end_block = bidx + nblock; + + for (; bidx < end_block; bidx++) { + bool nocase = false; + + /* no need to allocate new dentry pages to all the indices */ + dentry_page = find_data_page(dir, bidx, true); + if (IS_ERR(dentry_page)) { + room = true; + continue; + } + + if (test_opt(sbi, ANDROID_EMU) && + (sbi->android_emu_flags & F2FS_ANDROID_EMU_NOCASE) && + F2FS_I(dir)->i_advise & FADVISE_ANDROID_EMU) + nocase = true; + + de = find_in_block(dentry_page, name, namelen, + &max_slots, namehash, res_page, + nocase); + if (de) + break; + + if (max_slots >= s) + room = true; + f2fs_put_page(dentry_page, 0); + } + + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; + } + + return de; +} + +/* + * Find an entry in the specified directory with the wanted name. + * It returns the page where the entry was found (as a parameter - res_page), + * and the entry itself. Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, + struct qstr *child, struct page **res_page) +{ + const char *name = child->name; + size_t namelen = child->len; + unsigned long npages = dir_blocks(dir); + struct f2fs_dir_entry *de = NULL; + f2fs_hash_t name_hash; + unsigned int max_depth; + unsigned int level; + + if (namelen > F2FS_NAME_LEN) + return NULL; + + if (npages == 0) + return NULL; + + *res_page = NULL; + + name_hash = f2fs_dentry_hash(name, namelen); + max_depth = F2FS_I(dir)->i_current_depth; + + for (level = 0; level < max_depth; level++) { + de = find_in_level(dir, level, name, + namelen, name_hash, res_page); + if (de) + break; + } + if (!de && F2FS_I(dir)->chash != name_hash) { + F2FS_I(dir)->chash = name_hash; + F2FS_I(dir)->clevel = level - 1; + } + return de; +} + +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) +{ + struct page *page; + struct f2fs_dir_entry *de; + struct f2fs_dentry_block *dentry_blk; + + page = get_lock_data_page(dir, 0); + if (IS_ERR(page)) + return NULL; + + dentry_blk = kmap(page); + de = &dentry_blk->dentry[1]; + *p = page; + unlock_page(page); + return de; +} + +ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) +{ + ino_t res = 0; + struct f2fs_dir_entry *de; + struct page *page; + + de = f2fs_find_entry(dir, qstr, &page); + if (de) { + res = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + } + + return res; +} + +void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, + struct page *page, struct inode *inode) +{ + lock_page(page); + wait_on_page_writeback(page); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + kunmap(page); + set_page_dirty(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + mark_inode_dirty(dir); + + /* update parent inode number before releasing dentry page */ + F2FS_I(inode)->i_pino = dir->i_ino; + + f2fs_put_page(page, 1); +} + +static void init_dent_inode(const struct qstr *name, struct page *ipage) +{ + struct f2fs_node *rn; + + /* copy name info. to this inode page */ + rn = F2FS_NODE(ipage); + rn->i.i_namelen = cpu_to_le32(name->len); + memcpy(rn->i.i_name, name->name, name->len); + set_page_dirty(ipage); +} + +int update_dent_inode(struct inode *inode, const struct qstr *name) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + init_dent_inode(name, page); + f2fs_put_page(page, 1); + + return 0; +} + +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + void *kaddr; + + dentry_page = get_new_data_page(inode, page, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + + de = &dentry_blk->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(dentry_blk->filename[0], ".", 1); + set_de_type(de, inode); + + de = &dentry_blk->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(dentry_blk->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + kunmap_atomic(kaddr); + + set_page_dirty(dentry_page); + f2fs_put_page(dentry_page, 1); + return 0; +} + +static struct page *init_inode_metadata(struct inode *inode, + struct inode *dir, const struct qstr *name) +{ + struct page *page; + int err; + + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + page = new_inode_page(inode, name); + if (IS_ERR(page)) + return page; + + if (S_ISDIR(inode->i_mode)) { + err = make_empty_dir(inode, dir, page); + if (err) + goto error; + } + + err = f2fs_init_acl(inode, dir); + if (err) + goto error; + + err = f2fs_init_security(inode, dir, name, page); + if (err) + goto error; + + wait_on_page_writeback(page); + } else { + page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(page)) + return page; + + wait_on_page_writeback(page); + set_cold_node(inode, page); + } + + init_dent_inode(name, page); + + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + file_lost_pino(inode); + inc_nlink(inode); + } + return page; + +error: + f2fs_put_page(page, 1); + remove_inode_page(inode); + return ERR_PTR(err); +} + +static void update_parent_metadata(struct inode *dir, struct inode *inode, + unsigned int current_depth) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (S_ISDIR(inode->i_mode)) { + inc_nlink(dir); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + } + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + if (F2FS_I(dir)->i_current_depth != current_depth) { + F2FS_I(dir)->i_current_depth = current_depth; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) + update_inode_page(dir); + else + mark_inode_dirty(dir); + + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); +} + +static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_start); + if (zero_start >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + + zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + goto next; +} + +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ +int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) +{ + unsigned int bit_pos; + unsigned int level; + unsigned int current_depth; + unsigned long bidx, block; + f2fs_hash_t dentry_hash; + struct f2fs_dir_entry *de; + unsigned int nbucket, nblock; + size_t namelen = name->len; + struct page *dentry_page = NULL; + struct f2fs_dentry_block *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + dentry_hash = f2fs_dentry_hash(name->name, name->len); + level = 0; + current_depth = F2FS_I(dir)->i_current_depth; + if (F2FS_I(dir)->chash == dentry_hash) { + level = F2FS_I(dir)->clevel; + F2FS_I(dir)->chash = 0; + } + +start: + if (current_depth == MAX_DIR_HASH_DEPTH) + return -ENOSPC; + + /* Increase the depth, if required */ + if (level == current_depth) + ++current_depth; + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); + + for (block = bidx; block <= (bidx + nblock - 1); block++) { + dentry_page = get_new_data_page(dir, NULL, block, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + dentry_blk = kmap(dentry_page); + bit_pos = room_for_filename(dentry_blk, slots); + if (bit_pos < NR_DENTRY_IN_BLOCK) + goto add_dentry; + + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + } + + /* Move to next level to find the empty slot for new dentry */ + ++level; + goto start; +add_dentry: + wait_on_page_writeback(dentry_page); + + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = dentry_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(dentry_page); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, current_depth); +fail: + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + return err; +} + +/* + * It only removes the dentry from the dentry page,corresponding name + * entry in name page does not need to be touched during deletion. + */ +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *inode) +{ + struct f2fs_dentry_block *dentry_blk; + unsigned int bit_pos; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + void *kaddr = page_address(page); + int i; + + lock_page(page); + wait_on_page_writeback(page); + + dentry_blk = (struct f2fs_dentry_block *)kaddr; + bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + /* Let's check and deallocate this dentry page */ + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + 0); + kunmap(page); /* kunmap - pair of f2fs_find_entry */ + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode && S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + update_inode_page(dir); + } else { + mark_inode_dirty(dir); + } + + if (inode) { + inode->i_ctime = CURRENT_TIME; + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); + } + + if (bit_pos == NR_DENTRY_IN_BLOCK) { + truncate_hole(dir, page->index, page->index + 1); + clear_page_dirty_for_io(page); + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(dir); + } + f2fs_put_page(page, 1); +} + +bool f2fs_empty_dir(struct inode *dir) +{ + unsigned long bidx; + struct page *dentry_page; + unsigned int bit_pos; + struct f2fs_dentry_block *dentry_blk; + unsigned long nblock = dir_blocks(dir); + + for (bidx = 0; bidx < nblock; bidx++) { + void *kaddr; + dentry_page = get_lock_data_page(dir, bidx); + if (IS_ERR(dentry_page)) { + if (PTR_ERR(dentry_page) == -ENOENT) + continue; + else + return false; + } + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + if (bidx == 0) + bit_pos = 2; + else + bit_pos = 0; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + kunmap_atomic(kaddr); + + f2fs_put_page(dentry_page, 1); + + if (bit_pos < NR_DENTRY_IN_BLOCK) + return false; + } + return true; +} + +static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + unsigned long pos = file->f_pos; + struct inode *inode = file->f_dentry->d_inode; + unsigned long npages = dir_blocks(inode); + unsigned char *types = NULL; + unsigned int bit_pos = 0, start_bit_pos = 0; + int over = 0; + struct f2fs_dentry_block *dentry_blk = NULL; + struct f2fs_dir_entry *de = NULL; + struct page *dentry_page = NULL; + unsigned int n = 0; + unsigned char d_type = DT_UNKNOWN; + int slots; + + types = f2fs_filetype_table; + bit_pos = (pos % NR_DENTRY_IN_BLOCK); + n = (pos / NR_DENTRY_IN_BLOCK); + + for ( ; n < npages; n++) { + dentry_page = get_lock_data_page(inode, n); + if (IS_ERR(dentry_page)) + continue; + + start_bit_pos = bit_pos; + dentry_blk = kmap(dentry_page); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + d_type = DT_UNKNOWN; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + break; + + de = &dentry_blk->dentry[bit_pos]; + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; + + over = filldir(dirent, + dentry_blk->filename[bit_pos], + le16_to_cpu(de->name_len), + (n * NR_DENTRY_IN_BLOCK) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; + goto success; + } + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + bit_pos += slots; + } + bit_pos = 0; + file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + dentry_page = NULL; + } +success: + if (dentry_page && !IS_ERR(dentry_page)) { + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + } + + return 0; +} + +const struct file_operations f2fs_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = f2fs_readdir, + .fsync = f2fs_sync_file, + .unlocked_ioctl = f2fs_ioctl, +}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h new file mode 100644 index 00000000000..c6c24756047 --- /dev/null +++ b/fs/f2fs/f2fs.h @@ -0,0 +1,1290 @@ +/* + * fs/f2fs/f2fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_H +#define _LINUX_F2FS_H + +#include +#include +#include +#include +#include +#include +#include + +/* + * For mount options + */ +#define F2FS_MOUNT_BG_GC 0x00000001 +#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 +#define F2FS_MOUNT_DISCARD 0x00000004 +#define F2FS_MOUNT_NOHEAP 0x00000008 +#define F2FS_MOUNT_XATTR_USER 0x00000010 +#define F2FS_MOUNT_POSIX_ACL 0x00000020 +#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 +#define F2FS_MOUNT_INLINE_XATTR 0x00000080 +#define F2FS_MOUNT_ANDROID_EMU 0x00001000 +#define F2FS_MOUNT_ERRORS_PANIC 0x00002000 +#define F2FS_MOUNT_ERRORS_RECOVER 0x00004000 + +#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) + +#define ver_after(a, b) (typecheck(unsigned long long, a) && \ + typecheck(unsigned long long, b) && \ + ((long long)((a) - (b)) > 0)) + +typedef u32 block_t; /* + * should not change u32, since it is the on-disk block + * address format, __le32. + */ +typedef u32 nid_t; + +struct f2fs_mount_info { + unsigned int opt; +}; + +#define CRCPOLY_LE 0xedb88320 + +static inline __u32 f2fs_crc32(void *buf, size_t len) +{ + unsigned char *p = (unsigned char *)buf; + __u32 crc = F2FS_SUPER_MAGIC; + int i; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; +} + +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) +{ + return f2fs_crc32(buf, buf_size) == blk_crc; +} + +/* + * For checkpoint manager + */ +enum { + NAT_BITMAP, + SIT_BITMAP +}; + +/* for the list of orphan inodes */ +struct orphan_inode_entry { + struct list_head list; /* list head */ + nid_t ino; /* inode number */ +}; + +/* for the list of directory inodes */ +struct dir_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ +}; + +/* for the list of fsync inodes, used only during recovery */ +struct fsync_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ + block_t blkaddr; /* block address locating the last inode */ +}; + +#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) +#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) + +#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) +#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) +#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) +#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) + +static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = nats_in_cursum(rs); + rs->n_nats = cpu_to_le16(before + i); + return before; +} + +static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = sits_in_cursum(rs); + rs->n_sits = cpu_to_le16(before + i); + return before; +} + +/* + * ioctl commands + */ +#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS +#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS + +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) +/* + * ioctl commands in 32 bit emulation + */ +#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#endif + +/* + * For INODE and NODE manager + */ +/* + * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 + * as its node offset to distinguish from index node blocks. + * But some bits are used to mark the node block. + */ +#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \ + >> OFFSET_BIT_SHIFT) +enum { + ALLOC_NODE, /* allocate a new node page if needed */ + LOOKUP_NODE, /* look up a node without readahead */ + LOOKUP_NODE_RA, /* + * look up a node with readahead called + * by get_datablock_ro. + */ +}; + +#define F2FS_LINK_MAX 32000 /* maximum link count per file */ + +/* for in-memory extent cache entry */ +struct extent_info { + rwlock_t ext_lock; /* rwlock for consistency */ + unsigned int fofs; /* start offset in a file */ + u32 blk_addr; /* start block address of the extent */ + unsigned int len; /* length of the extent */ +}; + +/* + * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. + */ +#define FADVISE_COLD_BIT 0x01 +#define FADVISE_LOST_PINO_BIT 0x02 +#define FADVISE_ANDROID_EMU 0x10 +#define FADVISE_ANDROID_EMU_ROOT 0x20 + +struct f2fs_inode_info { + struct inode vfs_inode; /* serve a vfs inode */ + unsigned long i_flags; /* keep an inode flags for ioctl */ + unsigned char i_advise; /* use to give file attribute hints */ + unsigned int i_current_depth; /* use only in directory structure */ + unsigned int i_pino; /* parent inode number */ + umode_t i_acl_mode; /* keep file acl mode temporarily */ + + /* Use below internally in f2fs*/ + unsigned long flags; /* use to pass per-file flags */ + atomic_t dirty_dents; /* # of dirty dentry pages */ + f2fs_hash_t chash; /* hash value of given file name */ + unsigned int clevel; /* maximum level of given file name */ + nid_t i_xattr_nid; /* node id that contains xattrs */ + unsigned long long xattr_ver; /* cp version of xattr modification */ + struct extent_info ext; /* in-memory extent cache entry */ +}; + +static inline void get_extent_info(struct extent_info *ext, + struct f2fs_extent i_ext) +{ + write_lock(&ext->ext_lock); + ext->fofs = le32_to_cpu(i_ext.fofs); + ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->len = le32_to_cpu(i_ext.len); + write_unlock(&ext->ext_lock); +} + +static inline void set_raw_extent(struct extent_info *ext, + struct f2fs_extent *i_ext) +{ + read_lock(&ext->ext_lock); + i_ext->fofs = cpu_to_le32(ext->fofs); + i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->len = cpu_to_le32(ext->len); + read_unlock(&ext->ext_lock); +} + +struct f2fs_nm_info { + block_t nat_blkaddr; /* base disk address of NAT */ + nid_t max_nid; /* maximum possible node ids */ + nid_t next_scan_nid; /* the next nid to be scanned */ + + /* NAT cache management */ + struct radix_tree_root nat_root;/* root of the nat entry cache */ + rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + unsigned int nat_cnt; /* the # of cached nat entries */ + struct list_head nat_entries; /* cached nat entry list (clean) */ + struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ + + /* free node ids management */ + struct list_head free_nid_list; /* a list for free nids */ + spinlock_t free_nid_list_lock; /* protect free nid list */ + unsigned int fcnt; /* the number of free node id */ + struct mutex build_lock; /* lock for build free nids */ + + /* for checkpoint */ + char *nat_bitmap; /* NAT bitmap pointer */ + int bitmap_size; /* bitmap size */ +}; + +/* + * this structure is used as one of function parameters. + * all the information are dedicated to a given direct node block determined + * by the data offset in a file. + */ +struct dnode_of_data { + struct inode *inode; /* vfs inode pointer */ + struct page *inode_page; /* its inode page, NULL is possible */ + struct page *node_page; /* cached direct node page */ + nid_t nid; /* node id of the direct node block */ + unsigned int ofs_in_node; /* data offset in the node page */ + bool inode_page_locked; /* inode page is locked or not */ + block_t data_blkaddr; /* block address of the node block */ +}; + +static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, + struct page *ipage, struct page *npage, nid_t nid) +{ + memset(dn, 0, sizeof(*dn)); + dn->inode = inode; + dn->inode_page = ipage; + dn->node_page = npage; + dn->nid = nid; +} + +/* + * For SIT manager + * + * By default, there are 6 active log areas across the whole main area. + * When considering hot and cold data separation to reduce cleaning overhead, + * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, + * respectively. + * In the current design, you should not change the numbers intentionally. + * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 + * logs individually according to the underlying devices. (default: 6) + * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for + * data and 8 for node logs. + */ +#define NR_CURSEG_DATA_TYPE (3) +#define NR_CURSEG_NODE_TYPE (3) +#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) + +enum { + CURSEG_HOT_DATA = 0, /* directory entry blocks */ + CURSEG_WARM_DATA, /* data blocks */ + CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ + CURSEG_HOT_NODE, /* direct node blocks of directory files */ + CURSEG_WARM_NODE, /* direct node blocks of normal files */ + CURSEG_COLD_NODE, /* indirect node blocks */ + NO_CHECK_TYPE +}; + +struct f2fs_sm_info { + struct sit_info *sit_info; /* whole segment information */ + struct free_segmap_info *free_info; /* free segment information */ + struct dirty_seglist_info *dirty_info; /* dirty segment information */ + struct curseg_info *curseg_array; /* active segment information */ + + struct list_head wblist_head; /* list of under-writeback pages */ + spinlock_t wblist_lock; /* lock for checkpoint */ + + block_t seg0_blkaddr; /* block address of 0'th segment */ + block_t main_blkaddr; /* start block address of main area */ + block_t ssa_blkaddr; /* start block address of SSA area */ + + unsigned int segment_count; /* total # of segments */ + unsigned int main_segments; /* # of segments in main area */ + unsigned int reserved_segments; /* # of reserved segments */ + unsigned int ovp_segments; /* # of overprovision segments */ +}; + +/* + * For superblock + */ +/* + * COUNT_TYPE for monitoring + * + * f2fs monitors the number of several block types such as on-writeback, + * dirty dentry blocks, dirty node blocks, and dirty meta blocks. + */ +enum count_type { + F2FS_WRITEBACK, + F2FS_DIRTY_DENTS, + F2FS_DIRTY_NODES, + F2FS_DIRTY_META, + NR_COUNT_TYPE, +}; + +/* + * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. + * The checkpoint procedure blocks all the locks in this fs_lock array. + * Some FS operations grab free locks, and if there is no free lock, + * then wait to grab a lock in a round-robin manner. + */ +#define NR_GLOBAL_LOCKS 8 + +/* + * The below are the page types of bios used in submti_bio(). + * The available types are: + * DATA User data pages. It operates as async mode. + * NODE Node pages. It operates as async mode. + * META FS metadata pages such as SIT, NAT, CP. + * NR_PAGE_TYPE The number of page types. + * META_FLUSH Make sure the previous pages are written + * with waiting the bio's completion + * ... Only can be used with META. + */ +enum page_type { + DATA, + NODE, + META, + NR_PAGE_TYPE, + META_FLUSH, +}; + +/* + * Android sdcard emulation flags + */ +#define F2FS_ANDROID_EMU_NOCASE 0x00000001 + +struct f2fs_sb_info { + struct super_block *sb; /* pointer to VFS super block */ + struct proc_dir_entry *s_proc; /* proc entry */ + struct buffer_head *raw_super_buf; /* buffer head of raw sb */ + struct f2fs_super_block *raw_super; /* raw super block pointer */ + int s_dirty; /* dirty flag for checkpoint */ + + /* for node-related operations */ + struct f2fs_nm_info *nm_info; /* node manager */ + struct inode *node_inode; /* cache node blocks */ + + /* for segment-related operations */ + struct f2fs_sm_info *sm_info; /* segment manager */ + struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ + sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ + struct rw_semaphore bio_sem; /* IO semaphore */ + + /* for checkpoint */ + struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + struct inode *meta_inode; /* cache meta blocks */ + struct mutex cp_mutex; /* checkpoint procedure lock */ + struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ + struct mutex node_write; /* locking node writes */ + struct mutex writepages; /* mutex for writepages() */ + unsigned char next_lock_num; /* round-robin global locks */ + int por_doing; /* recovery is doing or not */ + int on_build_free_nids; /* build_free_nids is doing */ + + /* for orphan inode management */ + struct list_head orphan_inode_list; /* orphan inode list */ + struct mutex orphan_inode_mutex; /* for orphan inode list */ + unsigned int n_orphans; /* # of orphan inodes */ + + /* for directory inode management */ + struct list_head dir_inode_list; /* dir inode list */ + spinlock_t dir_inode_lock; /* for dir inode list lock */ + + /* basic file system units */ + unsigned int log_sectors_per_block; /* log2 sectors per block */ + unsigned int log_blocksize; /* log2 block size */ + unsigned int blocksize; /* block size */ + unsigned int root_ino_num; /* root inode number*/ + unsigned int node_ino_num; /* node inode number*/ + unsigned int meta_ino_num; /* meta inode number*/ + unsigned int log_blocks_per_seg; /* log2 blocks per segment */ + unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int segs_per_sec; /* segments per section */ + unsigned int secs_per_zone; /* sections per zone */ + unsigned int total_sections; /* total section count */ + unsigned int total_node_count; /* total node block count */ + unsigned int total_valid_node_count; /* valid node block count */ + unsigned int total_valid_inode_count; /* valid inode count */ + int active_logs; /* # of active logs */ + + block_t user_block_count; /* # of user blocks */ + block_t total_valid_block_count; /* # of valid blocks */ + block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t last_valid_block_count; /* for recovery */ + u32 s_next_generation; /* for NFS support */ + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ + + struct f2fs_mount_info mount_opt; /* mount options */ + + /* for cleaning operations */ + struct mutex gc_mutex; /* mutex for GC */ + struct f2fs_gc_kthread *gc_thread; /* GC thread */ + unsigned int cur_victim_sec; /* current victim section num */ + + /* + * for stat information. + * one is for the LFS mode, and the other is for the SSR mode. + */ +#ifdef CONFIG_F2FS_STAT_FS + struct f2fs_stat_info *stat_info; /* FS status information */ + unsigned int segment_count[2]; /* # of allocated segments */ + unsigned int block_count[2]; /* # of allocated blocks */ + int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ + int bg_gc; /* background gc calls */ + unsigned int n_dirty_dirs; /* # of dir inodes */ +#endif + unsigned int last_victim[2]; /* last victim segment # */ + spinlock_t stat_lock; /* lock for stat operations */ + + /* For sysfs suppport */ + struct kobject s_kobj; + struct completion s_kobj_unregister; + + /* For Android sdcard emulation */ + u32 android_emu_uid; + u32 android_emu_gid; + umode_t android_emu_mode; + int android_emu_flags; +}; + +/* + * Inline functions + */ +static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) +{ + return container_of(inode, struct f2fs_inode_info, vfs_inode); +} + +static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_super_block *)(sbi->raw_super); +} + +static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_checkpoint *)(sbi->ckpt); +} + +static inline struct f2fs_node *F2FS_NODE(struct page *page) +{ + return (struct f2fs_node *)page_address(page); +} + +static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_nm_info *)(sbi->nm_info); +} + +static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_sm_info *)(sbi->sm_info); +} + +static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) +{ + return (struct sit_info *)(SM_I(sbi)->sit_info); +} + +static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) +{ + return (struct free_segmap_info *)(SM_I(sbi)->free_info); +} + +static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) +{ + return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); +} + +static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 1; +} + +static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 0; +} + +static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) +{ + return le64_to_cpu(cp->checkpoint_ver); +} + +static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + return ckpt_flags & f; +} + +static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags |= f; + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags &= (~f); + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void mutex_lock_all(struct f2fs_sb_info *sbi) +{ + int i; + + for (i = 0; i < NR_GLOBAL_LOCKS; i++) { + /* + * This is the only time we take multiple fs_lock[] + * instances; the order is immaterial since we + * always hold cp_mutex, which serializes multiple + * such operations. + */ + mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); + } +} + +static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) +{ + int i = 0; + for (; i < NR_GLOBAL_LOCKS; i++) + mutex_unlock(&sbi->fs_lock[i]); +} + +static inline int mutex_lock_op(struct f2fs_sb_info *sbi) +{ + unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; + int i = 0; + + for (; i < NR_GLOBAL_LOCKS; i++) + if (mutex_trylock(&sbi->fs_lock[i])) + return i; + + mutex_lock(&sbi->fs_lock[next_lock]); + sbi->next_lock_num++; + return next_lock; +} + +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) +{ + if (ilock < 0) + return; + BUG_ON(ilock >= NR_GLOBAL_LOCKS); + mutex_unlock(&sbi->fs_lock[ilock]); +} + +/* + * Check whether the given nid is within node id range. + */ +static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + WARN_ON((nid >= NM_I(sbi)->max_nid)); + if (nid >= NM_I(sbi)->max_nid) + return -EINVAL; + return 0; +} + +#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 + +/* + * Check whether the inode has blocks or not + */ +static inline int F2FS_HAS_BLOCKS(struct inode *inode) +{ + if (F2FS_I(inode)->i_xattr_nid) + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); + else + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); +} + +static inline int f2fs_handle_error(struct f2fs_sb_info *sbi) +{ + if (test_opt(sbi, ERRORS_PANIC)) + BUG(); + if (test_opt(sbi, ERRORS_RECOVER)) + return 1; + return 0; +} + +static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, blkcnt_t count) +{ + block_t valid_block_count; + + spin_lock(&sbi->stat_lock); + valid_block_count = + sbi->total_valid_block_count + (block_t)count; + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + inode->i_blocks += count; + sbi->total_valid_block_count = valid_block_count; + sbi->alloc_valid_block_count += (block_t)count; + spin_unlock(&sbi->stat_lock); + return true; +} + +static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, + blkcnt_t count) +{ + spin_lock(&sbi->stat_lock); + + if (sbi->total_valid_block_count < (block_t)count) { + pr_crit("F2FS-fs (%s): block accounting error: %u < %llu\n", + sbi->sb->s_id, sbi->total_valid_block_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_block_count = count; + } + if (inode->i_blocks < count) { + pr_crit("F2FS-fs (%s): inode accounting error: %llu < %llu\n", + sbi->sb->s_id, inode->i_blocks, count); + f2fs_handle_error(sbi); + inode->i_blocks = count; + } + + inode->i_blocks -= count; + sbi->total_valid_block_count -= (block_t)count; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_inc(&sbi->nr_pages[count_type]); + F2FS_SET_SB_DIRT(sbi); +} + +static inline void inode_inc_dirty_dents(struct inode *inode) +{ + atomic_inc(&F2FS_I(inode)->dirty_dents); +} + +static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_dec(&sbi->nr_pages[count_type]); +} + +static inline void inode_dec_dirty_dents(struct inode *inode) +{ + atomic_dec(&F2FS_I(inode)->dirty_dents); +} + +static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) +{ + return atomic_read(&sbi->nr_pages[count_type]); +} + +static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) +{ + unsigned int pages_per_sec = sbi->segs_per_sec * + (1 << sbi->log_blocks_per_seg); + return ((get_pages(sbi, block_type) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; +} + +static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_block_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + /* return NAT or SIT bitmap */ + if (flag == NAT_BITMAP) + return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + else if (flag == SIT_BITMAP) + return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + + return 0; +} + +static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + int offset = (flag == NAT_BITMAP) ? + le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; +} + +static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_version = cur_cp_version(ckpt); + + start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + /* + * odd numbered checkpoint should at cp segment 0 + * and even segent must be at cp segment 1 + */ + if (!(ckpt_version & 1)) + start_addr += sbi->blocks_per_seg; + + return start_addr; +} + +static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + block_t valid_block_count; + unsigned int valid_node_count; + + spin_lock(&sbi->stat_lock); + + valid_block_count = sbi->total_valid_block_count + (block_t)count; + valid_node_count = sbi->total_valid_node_count + count; + + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (valid_node_count > sbi->total_node_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (inode) + inode->i_blocks += count; + sbi->alloc_valid_block_count += (block_t)count; + sbi->total_valid_node_count = valid_node_count; + sbi->total_valid_block_count = valid_block_count; + spin_unlock(&sbi->stat_lock); + + return true; +} + +static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + spin_lock(&sbi->stat_lock); + + if (sbi->total_valid_block_count < count) { + pr_crit("F2FS-fs (%s): block accounting error: %u < %u\n", + sbi->sb->s_id, sbi->total_valid_block_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_block_count = count; + } + if (sbi->total_valid_node_count < count) { + pr_crit("F2FS-fs (%s): node accounting error: %u < %u\n", + sbi->sb->s_id, sbi->total_valid_node_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_node_count = count; + } + if (inode->i_blocks < count) { + pr_crit("F2FS-fs (%s): inode accounting error: %llu < %u\n", + sbi->sb->s_id, inode->i_blocks, count); + f2fs_handle_error(sbi); + inode->i_blocks = count; + } + + inode->i_blocks -= count; + sbi->total_valid_node_count -= count; + sbi->total_valid_block_count -= (block_t)count; + + spin_unlock(&sbi->stat_lock); +} + +static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_node_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); + sbi->total_valid_inode_count++; + spin_unlock(&sbi->stat_lock); +} + +static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(!sbi->total_valid_inode_count); + sbi->total_valid_inode_count--; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_inode_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void f2fs_put_page(struct page *page, int unlock) +{ + if (!page || IS_ERR(page)) + return; + + if (unlock) { + BUG_ON(!PageLocked(page)); + unlock_page(page); + } + page_cache_release(page); +} + +static inline void f2fs_put_dnode(struct dnode_of_data *dn) +{ + if (dn->node_page) + f2fs_put_page(dn->node_page, 1); + if (dn->inode_page && dn->node_page != dn->inode_page) + f2fs_put_page(dn->inode_page, 0); + dn->node_page = NULL; + dn->inode_page = NULL; +} + +static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, + size_t size, void (*ctor)(void *)) +{ + return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); +} + +#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) + +static inline bool IS_INODE(struct page *page) +{ + struct f2fs_node *p = F2FS_NODE(page); + return RAW_IS_INODE(p); +} + +static inline __le32 *blkaddr_in_node(struct f2fs_node *node) +{ + return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; +} + +static inline block_t datablock_addr(struct page *node_page, + unsigned int offset) +{ + struct f2fs_node *raw_node; + __le32 *addr_array; + raw_node = F2FS_NODE(node_page); + addr_array = blkaddr_in_node(raw_node); + return le32_to_cpu(addr_array[offset]); +} + +static inline int f2fs_test_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + return mask & *addr; +} + +static inline int f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr |= mask; + return ret; +} + +static inline int f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr &= ~mask; + return ret; +} + +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_UPDATE_DIR, /* should update inode block for consistency */ + FI_DELAY_IPUT, /* used for the recovery */ + FI_INLINE_XATTR, /* used for inline xattr */ +}; + +static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + set_bit(flag, &fi->flags); +} + +static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +{ + return test_bit(flag, &fi->flags); +} + +static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + clear_bit(flag, &fi->flags); +} + +static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +{ + fi->i_acl_mode = mode; + set_inode_flag(fi, FI_ACL_MODE); +} + +static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + clear_inode_flag(fi, FI_ACL_MODE); + return 1; + } + return 0; +} + +int f2fs_android_emu(struct f2fs_sb_info *, struct inode *, u32 *, u32 *, + umode_t *); + +#define IS_ANDROID_EMU(sbi, fi, pfi) \ + (test_opt((sbi), ANDROID_EMU) && \ + (((fi)->i_advise & FADVISE_ANDROID_EMU) || \ + ((pfi)->i_advise & FADVISE_ANDROID_EMU))) + +static inline void get_inline_info(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + if (ri->i_inline & F2FS_INLINE_XATTR) + set_inode_flag(fi, FI_INLINE_XATTR); +} + +static inline void set_raw_inline(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + ri->i_inline = 0; + + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + ri->i_inline |= F2FS_INLINE_XATTR; +} + +static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) +{ + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; + return DEF_ADDRS_PER_INODE; +} + +static inline void *inline_xattr_addr(struct page *page) +{ + struct f2fs_inode *ri; + ri = (struct f2fs_inode *)page_address(page); + return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - + F2FS_INLINE_XATTR_ADDRS]); +} + +static inline int inline_xattr_size(struct inode *inode) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) + return F2FS_INLINE_XATTR_ADDRS << 2; + else + return 0; +} + +static inline int f2fs_readonly(struct super_block *sb) +{ + return sb->s_flags & MS_RDONLY; +} + +/* + * file.c + */ +int f2fs_sync_file(struct file *, loff_t, loff_t, int); +void truncate_data_blocks(struct dnode_of_data *); +void f2fs_truncate(struct inode *); +int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +int f2fs_setattr(struct dentry *, struct iattr *); +int truncate_hole(struct inode *, pgoff_t, pgoff_t); +int truncate_data_blocks_range(struct dnode_of_data *, int); +long f2fs_ioctl(struct file *, unsigned int, unsigned long); +long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); + +/* + * inode.c + */ +void f2fs_set_inode_flags(struct inode *); +struct inode *f2fs_iget(struct super_block *, unsigned long); +void update_inode(struct inode *, struct page *); +int update_inode_page(struct inode *); +int f2fs_write_inode(struct inode *, struct writeback_control *); +void f2fs_evict_inode(struct inode *); + +/* + * namei.c + */ +struct dentry *f2fs_get_parent(struct dentry *child); + +/* + * dir.c + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, + struct page *, struct inode *); +int update_dent_inode(struct inode *, const struct qstr *); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_make_empty(struct inode *, struct inode *); +bool f2fs_empty_dir(struct inode *); + +static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) +{ + return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, + inode); +} + +/* + * super.c + */ +int f2fs_sync_fs(struct super_block *, int); +extern __printf(3, 4) +void f2fs_msg(struct super_block *, const char *, const char *, ...); + +/* + * hash.c + */ +f2fs_hash_t f2fs_dentry_hash(const char *, size_t); + +/* + * node.c + */ +struct dnode_of_data; +struct node_info; + +int is_checkpointed_node(struct f2fs_sb_info *, nid_t); +void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); +int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); +int truncate_inode_blocks(struct inode *, pgoff_t); +int truncate_xattr_node(struct inode *, struct page *); +int remove_inode_page(struct inode *); +struct page *new_inode_page(struct inode *, const struct qstr *); +struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); +void ra_node_page(struct f2fs_sb_info *, nid_t); +struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_node_page_ra(struct page *, int); +void sync_inode_page(struct dnode_of_data *); +int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +bool alloc_nid(struct f2fs_sb_info *, nid_t *); +void alloc_nid_done(struct f2fs_sb_info *, nid_t); +void alloc_nid_failed(struct f2fs_sb_info *, nid_t); +void recover_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, struct node_info *, block_t); +int recover_inode_page(struct f2fs_sb_info *, struct page *); +int restore_node_summary(struct f2fs_sb_info *, unsigned int, + struct f2fs_summary_block *); +void flush_nat_entries(struct f2fs_sb_info *); +int build_node_manager(struct f2fs_sb_info *); +void destroy_node_manager(struct f2fs_sb_info *); +int __init create_node_manager_caches(void); +void destroy_node_manager_caches(void); + +/* + * segment.c + */ +void f2fs_balance_fs(struct f2fs_sb_info *); +void invalidate_blocks(struct f2fs_sb_info *, block_t); +void clear_prefree_segments(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *); +void allocate_new_segments(struct f2fs_sb_info *); +struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +struct bio *f2fs_bio_alloc(struct block_device *, int); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); +void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); +void write_meta_page(struct f2fs_sb_info *, struct page *); +void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, + block_t, block_t *); +void write_data_page(struct inode *, struct page *, struct dnode_of_data*, + block_t, block_t *); +void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); +void recover_data_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void rewrite_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void write_data_summaries(struct f2fs_sb_info *, block_t); +void write_node_summaries(struct f2fs_sb_info *, block_t); +int lookup_journal_in_cursum(struct f2fs_summary_block *, + int, unsigned int, int); +void flush_sit_entries(struct f2fs_sb_info *); +int build_segment_manager(struct f2fs_sb_info *); +void destroy_segment_manager(struct f2fs_sb_info *); + +/* + * checkpoint.c + */ +struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +int acquire_orphan_inode(struct f2fs_sb_info *); +void release_orphan_inode(struct f2fs_sb_info *); +void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void remove_orphan_inode(struct f2fs_sb_info *, nid_t); +int recover_orphan_inodes(struct f2fs_sb_info *); +int get_valid_checkpoint(struct f2fs_sb_info *); +void set_dirty_dir_page(struct inode *, struct page *); +void add_dirty_dir_inode(struct inode *); +void remove_dirty_dir_inode(struct inode *); +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); +void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void write_checkpoint(struct f2fs_sb_info *, bool); +void init_orphan_info(struct f2fs_sb_info *); +int __init create_checkpoint_caches(void); +void destroy_checkpoint_caches(void); + +/* + * data.c + */ +int reserve_new_block(struct dnode_of_data *); +void update_extent_cache(block_t, struct dnode_of_data *); +struct page *find_data_page(struct inode *, pgoff_t, bool); +struct page *get_lock_data_page(struct inode *, pgoff_t); +struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); +int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); +int do_write_data_page(struct page *); + +/* + * gc.c + */ +int start_gc_thread(struct f2fs_sb_info *); +void stop_gc_thread(struct f2fs_sb_info *); +block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); +int f2fs_gc(struct f2fs_sb_info *); +void build_gc_manager(struct f2fs_sb_info *); +int __init create_gc_caches(void); +void destroy_gc_caches(void); + +/* + * recovery.c + */ +int recover_fsync_data(struct f2fs_sb_info *); +bool space_for_roll_forward(struct f2fs_sb_info *); + +/* + * debug.c + */ +#ifdef CONFIG_F2FS_STAT_FS +struct f2fs_stat_info { + struct list_head stat_list; + struct f2fs_sb_info *sbi; + struct mutex stat_lock; + int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; + int main_area_segs, main_area_sections, main_area_zones; + int hit_ext, total_ext; + int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int nats, sits, fnids; + int total_count, utilization; + int bg_gc; + unsigned int valid_count, valid_node_count, valid_inode_count; + unsigned int bimodal, avg_vblocks; + int util_free, util_valid, util_invalid; + int rsvd_segs, overp_segs; + int dirty_count, node_pages, meta_pages; + int prefree_count, call_count; + int tot_segs, node_segs, data_segs, free_segs, free_secs; + int tot_blks, data_blks, node_blks; + int curseg[NR_CURSEG_TYPE]; + int cursec[NR_CURSEG_TYPE]; + int curzone[NR_CURSEG_TYPE]; + + unsigned int segment_count[2]; + unsigned int block_count[2]; + unsigned base_mem, cache_mem; +}; + +static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_stat_info*)sbi->stat_info; +} + +#define stat_inc_call_count(si) ((si)->call_count++) + +#define stat_inc_seg_count(sbi, type) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + (si)->tot_segs++; \ + if (type == SUM_TYPE_DATA) \ + si->data_segs++; \ + else \ + si->node_segs++; \ + } while (0) + +#define stat_inc_tot_blk_count(si, blks) \ + (si->tot_blks += (blks)) + +#define stat_inc_data_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + stat_inc_tot_blk_count(si, blks); \ + si->data_blks += (blks); \ + } while (0) + +#define stat_inc_node_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + stat_inc_tot_blk_count(si, blks); \ + si->node_blks += (blks); \ + } while (0) + +int f2fs_build_stats(struct f2fs_sb_info *); +void f2fs_destroy_stats(struct f2fs_sb_info *); +void __init f2fs_create_root_stats(void); +void f2fs_destroy_root_stats(void); +#else +#define stat_inc_call_count(si) +#define stat_inc_seg_count(si, type) +#define stat_inc_tot_blk_count(si, blks) +#define stat_inc_data_blk_count(si, blks) +#define stat_inc_node_blk_count(sbi, blks) + +static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } +static inline void __init f2fs_create_root_stats(void) { } +static inline void f2fs_destroy_root_stats(void) { } +#endif + +extern const struct file_operations f2fs_dir_operations; +extern const struct file_operations f2fs_file_operations; +extern const struct inode_operations f2fs_file_inode_operations; +extern const struct address_space_operations f2fs_dblock_aops; +extern const struct address_space_operations f2fs_node_aops; +extern const struct address_space_operations f2fs_meta_aops; +extern const struct inode_operations f2fs_dir_inode_operations; +extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_special_inode_operations; +#endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c new file mode 100644 index 00000000000..cf4b51c628d --- /dev/null +++ b/fs/f2fs/file.c @@ -0,0 +1,725 @@ +/* + * fs/f2fs/file.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "xattr.h" +#include "acl.h" +#include + +static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + block_t old_blk_addr; + struct dnode_of_data dn; + int err, ilock; + + f2fs_balance_fs(sbi); + + /* Wait if fs is frozen. This is racy so we check again later on + * and retry if the fs has been frozen after the page lock has + * been acquired + */ + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + /* block allocation */ + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); + if (err) { + mutex_unlock_op(sbi, ilock); + goto out; + } + + old_blk_addr = dn.data_blkaddr; + + if (old_blk_addr == NULL_ADDR) { + err = reserve_new_block(&dn); + if (err) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + goto out; + } + } + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + file_update_time(vma->vm_file); + lock_page(page); + if (page->mapping != inode->i_mapping || + page_offset(page) > i_size_read(inode) || + !PageUptodate(page)) { + unlock_page(page); + err = -EFAULT; + goto out; + } + + /* + * check to see if the page is mapped already (no holes) + */ + if (PageMappedToDisk(page)) + goto mapped; + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { + unsigned offset; + offset = i_size_read(inode) & ~PAGE_CACHE_MASK; + zero_user_segment(page, offset, PAGE_CACHE_SIZE); + } + set_page_dirty(page); + SetPageUptodate(page); + +mapped: + /* fill the page */ + wait_on_page_writeback(page); +out: + return block_page_mkwrite_return(err); +} + +static const struct vm_operations_struct f2fs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = f2fs_vm_page_mkwrite, +}; + +static int get_parent_ino(struct inode *inode, nid_t *pino) +{ + struct dentry *dentry; + + inode = igrab(inode); + + /* Alex - the following is equivalent to: dentry = d_find_any_alias(inode); */ + dentry = NULL; + spin_lock(&inode->i_lock); + if (!list_empty(&inode->i_dentry)) { + dentry = list_first_entry(&inode->i_dentry, + struct dentry, d_alias); + dget(dentry); + } + spin_unlock(&inode->i_lock); + + iput(inode); + if (!dentry) + return 0; + + if (update_dent_inode(inode, &dentry->d_name)) { + dput(dentry); + return 0; + } + + *pino = parent_ino(dentry); + dput(dentry); + return 1; +} + +int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = file->f_mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret = 0; + bool need_cp = false; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + + if (f2fs_readonly(inode->i_sb)) + return 0; + + trace_f2fs_sync_file_enter(inode); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) { + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + return ret; + } + + /* guarantee free sections for fsync */ + f2fs_balance_fs(sbi); + + mutex_lock(&inode->i_mutex); + + /* + * Both of fdatasync() and fsync() are able to be recovered from + * sudden-power-off. + */ + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) + need_cp = true; + else if (file_wrong_pino(inode)) + need_cp = true; + else if (!space_for_roll_forward(sbi)) + need_cp = true; + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + need_cp = true; + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) + need_cp = true; + + if (need_cp) { + nid_t pino; + + F2FS_I(inode)->xattr_ver = 0; + + /* all the dirty node pages should be flushed for POR */ + ret = f2fs_sync_fs(inode->i_sb, 1); + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + F2FS_I(inode)->i_pino = pino; + file_got_pino(inode); + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } + } else { + /* if there is no written node page, write its inode page */ + while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } + filemap_fdatawait_range(sbi->node_inode->i_mapping, + 0, LONG_MAX); + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + } +out: + mutex_unlock(&inode->i_mutex); + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + return ret; +} + +static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + vma->vm_ops = &f2fs_file_vm_ops; + return 0; +} + +int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +{ + int nr_free = 0, ofs = dn->ofs_in_node; + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_node *raw_node; + __le32 *addr; + + raw_node = F2FS_NODE(dn->node_page); + addr = blkaddr_in_node(raw_node) + ofs; + + for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { + block_t blkaddr = le32_to_cpu(*addr); + if (blkaddr == NULL_ADDR) + continue; + + update_extent_cache(NULL_ADDR, dn); + invalidate_blocks(sbi, blkaddr); + nr_free++; + } + if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); + set_page_dirty(dn->node_page); + sync_inode_page(dn); + } + dn->ofs_in_node = ofs; + + trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, + dn->ofs_in_node, nr_free); + return nr_free; +} + +void truncate_data_blocks(struct dnode_of_data *dn) +{ + truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); +} + +static void truncate_partial_data_page(struct inode *inode, u64 from) +{ + unsigned offset = from & (PAGE_CACHE_SIZE - 1); + struct page *page; + + if (!offset) + return; + + page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); + if (IS_ERR(page)) + return; + + lock_page(page); + if (page->mapping != inode->i_mapping) { + f2fs_put_page(page, 1); + return; + } + wait_on_page_writeback(page); + zero_user(page, offset, PAGE_CACHE_SIZE - offset); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static int truncate_blocks(struct inode *inode, u64 from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + unsigned int blocksize = inode->i_sb->s_blocksize; + struct dnode_of_data dn; + pgoff_t free_from; + int count = 0, ilock = -1; + int err; + + trace_f2fs_truncate_blocks_enter(inode, from); + + free_from = (pgoff_t) + ((from + blocksize - 1) >> (sbi->log_blocksize)); + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); + if (err) { + if (err == -ENOENT) + goto free_next; + mutex_unlock_op(sbi, ilock); + trace_f2fs_truncate_blocks_exit(inode, err); + return err; + } + + if (IS_INODE(dn.node_page)) + count = ADDRS_PER_INODE(F2FS_I(inode)); + else + count = ADDRS_PER_BLOCK; + + count -= dn.ofs_in_node; + BUG_ON(count < 0); + + if (dn.ofs_in_node || IS_INODE(dn.node_page)) { + truncate_data_blocks_range(&dn, count); + free_from += count; + } + + f2fs_put_dnode(&dn); +free_next: + err = truncate_inode_blocks(inode, free_from); + mutex_unlock_op(sbi, ilock); + + /* lastly zero out the first data page */ + truncate_partial_data_page(inode, from); + + trace_f2fs_truncate_blocks_exit(inode, err); + return err; +} + +void f2fs_truncate(struct inode *inode) +{ + int err; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + + trace_f2fs_truncate(inode); + + err = truncate_blocks(inode, i_size_read(inode)); + if (err) { + f2fs_msg(inode->i_sb, KERN_ERR, "truncate failed with %d", + err); + f2fs_handle_error(F2FS_SB(inode->i_sb)); + } else { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } +} + +int f2fs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blocks <<= 3; + return 0; +} + +#ifdef CONFIG_F2FS_FS_POSIX_ACL +static void __setattr_copy(struct inode *inode, const struct iattr *attr) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int ia_valid = attr->ia_valid; + + if (ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (ia_valid & ATTR_ATIME) + inode->i_atime = timespec_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + inode->i_mtime = timespec_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + inode->i_ctime = timespec_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + set_acl_inode(fi, mode); + } +} +#else +#define __setattr_copy setattr_copy +#endif + +int f2fs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode_info *pfi = F2FS_I(dentry->d_parent->d_inode); + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (IS_ANDROID_EMU(sbi, fi, pfi)) + f2fs_android_emu(sbi, inode, &attr->ia_uid, &attr->ia_gid, + &attr->ia_mode); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); + f2fs_truncate(inode); + f2fs_balance_fs(F2FS_SB(inode->i_sb)); + } + + __setattr_copy(inode, attr); + + if (attr->ia_valid & ATTR_MODE) { + err = f2fs_acl_chmod(inode); + if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + clear_inode_flag(fi, FI_ACL_MODE); + } + } + + mark_inode_dirty(inode); + return err; +} + +const struct inode_operations f2fs_file_inode_operations = { + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +static void fill_zero(struct inode *inode, pgoff_t index, + loff_t start, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + int ilock; + + if (!len) + return; + + f2fs_balance_fs(sbi); + + ilock = mutex_lock_op(sbi); + page = get_new_data_page(inode, NULL, index, false); + mutex_unlock_op(sbi, ilock); + + if (!IS_ERR(page)) { + wait_on_page_writeback(page); + zero_user(page, start, len); + set_page_dirty(page); + f2fs_put_page(page, 1); + } +} + +int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) +{ + pgoff_t index; + int err; + + for (index = pg_start; index < pg_end; index++) { + struct dnode_of_data dn; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) { + if (err == -ENOENT) + continue; + return err; + } + + if (dn.data_blkaddr != NULL_ADDR) + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + return 0; +} + +static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) +{ + pgoff_t pg_start, pg_end; + loff_t off_start, off_end; + int ret = 0; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + if (pg_start == pg_end) { + fill_zero(inode, pg_start, off_start, + off_end - off_start); + } else { + if (off_start) + fill_zero(inode, pg_start++, off_start, + PAGE_CACHE_SIZE - off_start); + if (off_end) + fill_zero(inode, pg_end, 0, off_end); + + if (pg_start < pg_end) { + struct address_space *mapping = inode->i_mapping; + loff_t blk_start, blk_end; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + + f2fs_balance_fs(sbi); + + blk_start = pg_start << PAGE_CACHE_SHIFT; + blk_end = pg_end << PAGE_CACHE_SHIFT; + truncate_inode_pages_range(mapping, blk_start, + blk_end - 1); + + ilock = mutex_lock_op(sbi); + ret = truncate_hole(inode, pg_start, pg_end); + mutex_unlock_op(sbi, ilock); + } + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) <= (offset + len)) { + i_size_write(inode, offset); + mark_inode_dirty(inode); + } + + return ret; +} + +static int expand_inode_data(struct inode *inode, loff_t offset, + loff_t len, int mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + pgoff_t index, pg_start, pg_end; + loff_t new_size = i_size_read(inode); + loff_t off_start, off_end; + int ret = 0; + + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) + return ret; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + for (index = pg_start; index <= pg_end; index++) { + struct dnode_of_data dn; + int ilock; + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (ret) { + mutex_unlock_op(sbi, ilock); + break; + } + + if (dn.data_blkaddr == NULL_ADDR) { + ret = reserve_new_block(&dn); + if (ret) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + break; + } + } + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + if (pg_start == pg_end) + new_size = offset + len; + else if (index == pg_start && off_start) + new_size = (index + 1) << PAGE_CACHE_SHIFT; + else if (index == pg_end) + new_size = (index << PAGE_CACHE_SHIFT) + off_end; + else + new_size += PAGE_CACHE_SIZE; + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) < new_size) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + } + + return ret; +} + +static long f2fs_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + long ret; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = punch_hole(inode, offset, len, mode); + else + ret = expand_inode_data(inode, offset, len, mode); + + if (!ret) { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + trace_f2fs_fallocate(inode, mode, offset, len, ret); + return ret; +} + +#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) +#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) + +static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & F2FS_REG_FLMASK; + else + return flags & F2FS_OTHER_FLMASK; +} + +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int flags; + int ret; + + switch (cmd) { + case F2FS_IOC_GETFLAGS: + flags = fi->i_flags & FS_FL_USER_VISIBLE; + return put_user(flags, (int __user *) arg); + case F2FS_IOC_SETFLAGS: + { + unsigned int oldflags; + + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + return ret; + + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto out; + } + + if (get_user(flags, (int __user *) arg)) { + ret = -EFAULT; + goto out; + } + + flags = f2fs_mask_flags(inode->i_mode, flags); + + mutex_lock(&inode->i_mutex); + + oldflags = fi->i_flags; + + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + ret = -EPERM; + goto out; + } + } + + flags = flags & FS_FL_USER_MODIFIABLE; + flags |= oldflags & ~FS_FL_USER_MODIFIABLE; + fi->i_flags = flags; + mutex_unlock(&inode->i_mutex); + + f2fs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); +out: + mnt_drop_write(filp->f_path.mnt); + return ret; + } + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case F2FS_IOC32_GETFLAGS: + cmd = F2FS_IOC_GETFLAGS; + break; + case F2FS_IOC32_SETFLAGS: + cmd = F2FS_IOC_SETFLAGS; + break; + default: + return -ENOIOCTLCMD; + } + return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); +} +#endif + +const struct file_operations f2fs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .open = generic_file_open, + .mmap = f2fs_file_mmap, + .fsync = f2fs_sync_file, + .fallocate = f2fs_fallocate, + .unlocked_ioctl = f2fs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = f2fs_compat_ioctl, +#endif + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c new file mode 100644 index 00000000000..e51c1b06b16 --- /dev/null +++ b/fs/f2fs/gc.c @@ -0,0 +1,738 @@ +/* + * fs/f2fs/gc.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" +#include + +static struct kmem_cache *winode_slab; + +static int gc_thread_func(void *data) +{ + struct f2fs_sb_info *sbi = data; + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + long wait_ms; + + wait_ms = gc_th->min_sleep_time; + + do { + if (try_to_freeze()) + continue; + else + wait_event_interruptible_timeout(*wq, + kthread_should_stop(), + msecs_to_jiffies(wait_ms)); + if (kthread_should_stop()) + break; + + /* + * [GC triggering condition] + * 0. GC is not conducted currently. + * 1. There are enough dirty segments. + * 2. IO subsystem is idle by checking the # of writeback pages. + * 3. IO subsystem is idle by checking the # of requests in + * bdev's request list. + * + * Note) We have to avoid triggering GCs too much frequently. + * Because it is possible that some segments can be + * invalidated soon after by user update or deletion. + * So, I'd like to wait some time to collect dirty segments. + */ + if (!mutex_trylock(&sbi->gc_mutex)) + continue; + + if (!is_idle(sbi)) { + wait_ms = increase_sleep_time(gc_th, wait_ms); + mutex_unlock(&sbi->gc_mutex); + continue; + } + + if (has_enough_invalid_blocks(sbi)) + wait_ms = decrease_sleep_time(gc_th, wait_ms); + else + wait_ms = increase_sleep_time(gc_th, wait_ms); + +#ifdef CONFIG_F2FS_STAT_FS + sbi->bg_gc++; +#endif + + /* if return value is not zero, no victim was selected */ + if (f2fs_gc(sbi)) + wait_ms = gc_th->no_gc_sleep_time; + } while (!kthread_should_stop()); + return 0; +} + +int start_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th; + dev_t dev = sbi->sb->s_bdev->bd_dev; + int err = 0; + + if (!test_opt(sbi, BG_GC)) + goto out; + gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); + if (!gc_th) { + err = -ENOMEM; + goto out; + } + + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + + gc_th->gc_idle = 0; + + sbi->gc_thread = gc_th; + init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, + "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(gc_th->f2fs_gc_task)) { + err = PTR_ERR(gc_th->f2fs_gc_task); + kfree(gc_th); + sbi->gc_thread = NULL; + } + +out: + return err; +} + +void stop_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + if (!gc_th) + return; + kthread_stop(gc_th->f2fs_gc_task); + kfree(gc_th); + sbi->gc_thread = NULL; +} + +static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) +{ + int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + + if (gc_th && gc_th->gc_idle) { + if (gc_th->gc_idle == 1) + gc_mode = GC_CB; + else if (gc_th->gc_idle == 2) + gc_mode = GC_GREEDY; + } + return gc_mode; +} + +static void select_policy(struct f2fs_sb_info *sbi, int gc_type, + int type, struct victim_sel_policy *p) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (p->alloc_mode == SSR) { + p->gc_mode = GC_GREEDY; + p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->ofs_unit = 1; + } else { + p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); + p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; + p->ofs_unit = sbi->segs_per_sec; + } + p->offset = sbi->last_victim[p->gc_mode]; +} + +static unsigned int get_max_cost(struct f2fs_sb_info *sbi, + struct victim_sel_policy *p) +{ + /* SSR allocates in a segment unit */ + if (p->alloc_mode == SSR) + return 1 << sbi->log_blocks_per_seg; + if (p->gc_mode == GC_GREEDY) + return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; + else if (p->gc_mode == GC_CB) + return UINT_MAX; + else /* No other gc_mode */ + return 0; +} + +static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int hint = 0; + unsigned int secno; + + /* + * If the gc_type is FG_GC, we can select victim segments + * selected by background GC before. + * Those segments guarantee they have small valid blocks. + */ +next: + secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); + if (secno < TOTAL_SECS(sbi)) { + if (sec_usage_check(sbi, secno)) + goto next; + clear_bit(secno, dirty_i->victim_secmap); + return secno * sbi->segs_per_sec; + } + return NULL_SEGNO; +} + +static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int secno = GET_SECNO(sbi, segno); + unsigned int start = secno * sbi->segs_per_sec; + unsigned long long mtime = 0; + unsigned int vblocks; + unsigned char age = 0; + unsigned char u; + unsigned int i; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, start + i)->mtime; + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + + mtime = div_u64(mtime, sbi->segs_per_sec); + vblocks = div_u64(vblocks, sbi->segs_per_sec); + + u = (vblocks * 100) >> sbi->log_blocks_per_seg; + + /* Handle if the system time is changed by user */ + if (mtime < sit_i->min_mtime) + sit_i->min_mtime = mtime; + if (mtime > sit_i->max_mtime) + sit_i->max_mtime = mtime; + if (sit_i->max_mtime != sit_i->min_mtime) + age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), + sit_i->max_mtime - sit_i->min_mtime); + + return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); +} + +static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, + struct victim_sel_policy *p) +{ + if (p->alloc_mode == SSR) + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + /* alloc_mode == LFS */ + if (p->gc_mode == GC_GREEDY) + return get_valid_blocks(sbi, segno, sbi->segs_per_sec); + else + return get_cb_cost(sbi, segno); +} + +/* + * This function is called from two paths. + * One is garbage collection and the other is SSR segment selection. + * When it is called during GC, it just gets a victim segment + * and it does not remove it from dirty seglist. + * When it is called from SSR segment selection, it finds a segment + * which has minimum valid blocks and removes it from dirty seglist. + */ +static int get_victim_by_default(struct f2fs_sb_info *sbi, + unsigned int *result, int gc_type, int type, char alloc_mode) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct victim_sel_policy p; + unsigned int secno, max_cost; + int nsearched = 0; + + p.alloc_mode = alloc_mode; + select_policy(sbi, gc_type, type, &p); + + p.min_segno = NULL_SEGNO; + p.min_cost = max_cost = get_max_cost(sbi, &p); + + mutex_lock(&dirty_i->seglist_lock); + + if (p.alloc_mode == LFS && gc_type == FG_GC) { + p.min_segno = check_bg_victims(sbi); + if (p.min_segno != NULL_SEGNO) + goto got_it; + } + + while (1) { + unsigned long cost; + unsigned int segno; + + segno = find_next_bit(p.dirty_segmap, + TOTAL_SEGS(sbi), p.offset); + if (segno >= TOTAL_SEGS(sbi)) { + if (sbi->last_victim[p.gc_mode]) { + sbi->last_victim[p.gc_mode] = 0; + p.offset = 0; + continue; + } + break; + } + p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + secno = GET_SECNO(sbi, segno); + + if (sec_usage_check(sbi, secno)) + continue; + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) + continue; + + cost = get_gc_cost(sbi, segno, &p); + + if (p.min_cost > cost) { + p.min_segno = segno; + p.min_cost = cost; + } + + if (cost == max_cost) + continue; + + if (nsearched++ >= MAX_VICTIM_SEARCH) { + sbi->last_victim[p.gc_mode] = segno; + break; + } + } + if (p.min_segno != NULL_SEGNO) { +got_it: + if (p.alloc_mode == LFS) { + secno = GET_SECNO(sbi, p.min_segno); + if (gc_type == FG_GC) + sbi->cur_victim_sec = secno; + else + set_bit(secno, dirty_i->victim_secmap); + } + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + + trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, + sbi->cur_victim_sec, + prefree_segments(sbi), free_segments(sbi)); + } + mutex_unlock(&dirty_i->seglist_lock); + + return (p.min_segno == NULL_SEGNO) ? 0 : 1; +} + +static const struct victim_selection default_v_ops = { + .get_victim = get_victim_by_default, +}; + +static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +{ + struct inode_entry *ie; + + list_for_each_entry(ie, ilist, list) + if (ie->inode->i_ino == ino) + return ie->inode; + return NULL; +} + +static void add_gc_inode(struct inode *inode, struct list_head *ilist) +{ + struct inode_entry *new_ie; + + if (inode == find_gc_inode(inode->i_ino, ilist)) { + iput(inode); + return; + } +repeat: + new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); + if (!new_ie) { + cond_resched(); + goto repeat; + } + new_ie->inode = inode; + list_add_tail(&new_ie->list, ilist); +} + +static void put_gc_inode(struct list_head *ilist) +{ + struct inode_entry *ie, *next_ie; + list_for_each_entry_safe(ie, next_ie, ilist, list) { + iput(ie->inode); + list_del(&ie->list); + kmem_cache_free(winode_slab, ie); + } +} + +static int check_valid_map(struct f2fs_sb_info *sbi, + unsigned int segno, int offset) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct seg_entry *sentry; + int ret; + + mutex_lock(&sit_i->sentry_lock); + sentry = get_seg_entry(sbi, segno); + ret = f2fs_test_bit(offset, sentry->cur_valid_map); + mutex_unlock(&sit_i->sentry_lock); + return ret; +} + +/* + * This function compares node address got in summary with that in NAT. + * On validity, copy that node with cold status, otherwise (invalid node) + * ignore that. + */ +static void gc_node_segment(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, unsigned int segno, int gc_type) +{ + bool initial = true; + struct f2fs_summary *entry; + int off; + +next_step: + entry = sum; + + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + nid_t nid = le32_to_cpu(entry->nid); + struct page *node_page; + + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; + + if (check_valid_map(sbi, segno, off) == 0) + continue; + + if (initial) { + ra_node_page(sbi, nid); + continue; + } + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + continue; + + /* set page dirty and write it */ + if (gc_type == FG_GC) { + f2fs_wait_on_page_writeback(node_page, NODE, true); + set_page_dirty(node_page); + } else { + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } + f2fs_put_page(node_page, 1); + stat_inc_node_blk_count(sbi, 1); + } + + if (initial) { + initial = false; + goto next_step; + } + + if (gc_type == FG_GC) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + sync_node_pages(sbi, 0, &wbc); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) + goto next_step; + } +} + +/* + * Calculate start block index indicating the given node offset. + * Be careful, caller should give this node offset only indicating direct node + * blocks. If any node offsets, which point the other types of node blocks such + * as indirect or double indirect node blocks, are given, it must be a caller's + * bug. + */ +block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) +{ + unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; + unsigned int bidx; + + if (node_ofs == 0) + return 0; + + if (node_ofs <= 2) { + bidx = node_ofs - 1; + } else if (node_ofs <= indirect_blks) { + int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 2 - dec; + } else { + int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 5 - dec; + } + return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); +} + +static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct node_info *dni, block_t blkaddr, unsigned int *nofs) +{ + struct page *node_page; + nid_t nid; + unsigned int ofs_in_node; + block_t source_blkaddr; + + nid = le32_to_cpu(sum->nid); + ofs_in_node = le16_to_cpu(sum->ofs_in_node); + + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return 0; + + get_node_info(sbi, nid, dni); + + if (sum->version != dni->version) { + f2fs_put_page(node_page, 1); + return 0; + } + + *nofs = ofs_of_node(node_page); + source_blkaddr = datablock_addr(node_page, ofs_in_node); + f2fs_put_page(node_page, 1); + + if (source_blkaddr != blkaddr) + return 0; + return 1; +} + +static void move_data_page(struct inode *inode, struct page *page, int gc_type) +{ + if (gc_type == BG_GC) { + if (PageWriteback(page)) + goto out; + set_page_dirty(page); + set_cold_data(page); + } else { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + f2fs_wait_on_page_writeback(page, DATA, true); + + if (clear_page_dirty_for_io(page) && + S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + set_cold_data(page); + do_write_data_page(page); + clear_cold_data(page); + } +out: + f2fs_put_page(page, 1); +} + +/* + * This function tries to get parent node of victim data block, and identifies + * data block validity. If the block is valid, copy that with cold status and + * modify parent node. + * If the parent node is not valid or the data block address is different, + * the victim data block is ignored. + */ +static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct list_head *ilist, unsigned int segno, int gc_type) +{ + struct super_block *sb = sbi->sb; + struct f2fs_summary *entry; + block_t start_addr; + int off; + int phase = 0; + + start_addr = START_BLOCK(sbi, segno); + +next_step: + entry = sum; + + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + struct page *data_page; + struct inode *inode; + struct node_info dni; /* dnode info for the data */ + unsigned int ofs_in_node, nofs; + block_t start_bidx; + + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; + + if (check_valid_map(sbi, segno, off) == 0) + continue; + + if (phase == 0) { + ra_node_page(sbi, le32_to_cpu(entry->nid)); + continue; + } + + /* Get an inode by ino with checking validity */ + if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) + continue; + + if (phase == 1) { + ra_node_page(sbi, dni.ino); + continue; + } + + ofs_in_node = le16_to_cpu(entry->ofs_in_node); + + if (phase == 2) { + inode = f2fs_iget(sb, dni.ino); + if (IS_ERR(inode)) + continue; + + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + + data_page = find_data_page(inode, + start_bidx + ofs_in_node, false); + if (IS_ERR(data_page)) + goto next_iput; + + f2fs_put_page(data_page, 0); + add_gc_inode(inode, ilist); + } else { + inode = find_gc_inode(dni.ino, ilist); + if (inode) { + start_bidx = start_bidx_of_node(nofs, + F2FS_I(inode)); + data_page = get_lock_data_page(inode, + start_bidx + ofs_in_node); + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); + } + } + continue; +next_iput: + iput(inode); + } + + if (++phase < 4) + goto next_step; + + if (gc_type == FG_GC) { + f2fs_submit_bio(sbi, DATA, true); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) { + phase = 2; + goto next_step; + } + } +} + +static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, + int gc_type, int type) +{ + struct sit_info *sit_i = SIT_I(sbi); + int ret; + mutex_lock(&sit_i->sentry_lock); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + mutex_unlock(&sit_i->sentry_lock); + return ret; +} + +static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, + struct list_head *ilist, int gc_type) +{ + struct page *sum_page; + struct f2fs_summary_block *sum; + struct blk_plug plug; + + /* read segment summary of victim */ + sum_page = get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return; + + blk_start_plug(&plug); + + sum = page_address(sum_page); + + switch (GET_SUM_TYPE((&sum->footer))) { + case SUM_TYPE_NODE: + gc_node_segment(sbi, sum->entries, segno, gc_type); + break; + case SUM_TYPE_DATA: + gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + break; + } + blk_finish_plug(&plug); + + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); + stat_inc_call_count(sbi->stat_info); + + f2fs_put_page(sum_page, 1); +} + +int f2fs_gc(struct f2fs_sb_info *sbi) +{ + struct list_head ilist; + unsigned int segno, i; + int gc_type = BG_GC; + int nfree = 0; + int ret = -1; + + INIT_LIST_HEAD(&ilist); +gc_more: + if (!(sbi->sb->s_flags & MS_ACTIVE)) + goto stop; + + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { + gc_type = FG_GC; + write_checkpoint(sbi, false); + } + + if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + goto stop; + ret = 0; + + for (i = 0; i < sbi->segs_per_sec; i++) + do_garbage_collect(sbi, segno + i, &ilist, gc_type); + + if (gc_type == FG_GC) { + sbi->cur_victim_sec = NULL_SEGNO; + nfree++; + WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); + } + + if (has_not_enough_free_secs(sbi, nfree)) + goto gc_more; + + if (gc_type == FG_GC) + write_checkpoint(sbi, false); +stop: + mutex_unlock(&sbi->gc_mutex); + + put_gc_inode(&ilist); + return ret; +} + +void build_gc_manager(struct f2fs_sb_info *sbi) +{ + DIRTY_I(sbi)->v_ops = &default_v_ops; +} + +int __init create_gc_caches(void) +{ + winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", + sizeof(struct inode_entry), NULL); + if (!winode_slab) + return -ENOMEM; + return 0; +} + +void destroy_gc_caches(void) +{ + kmem_cache_destroy(winode_slab); +} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h new file mode 100644 index 00000000000..f2a50cb2487 --- /dev/null +++ b/fs/f2fs/gc.h @@ -0,0 +1,110 @@ +/* + * fs/f2fs/gc.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define GC_THREAD_MIN_WB_PAGES 1 /* + * a threshold to determine + * whether IO subsystem is idle + * or not + */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ +#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ +#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ + +/* Search max. number of dirty segments to select a victim segment */ +#define MAX_VICTIM_SEARCH 20 + +struct f2fs_gc_kthread { + struct task_struct *f2fs_gc_task; + wait_queue_head_t gc_wait_queue_head; + + /* for gc sleep time */ + unsigned int min_sleep_time; + unsigned int max_sleep_time; + unsigned int no_gc_sleep_time; + + /* for changing gc mode */ + unsigned int gc_idle; +}; + +struct inode_entry { + struct list_head list; + struct inode *inode; +}; + +/* + * inline functions + */ +static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) +{ + if (free_segments(sbi) < overprovision_segments(sbi)) + return 0; + else + return (free_segments(sbi) - overprovision_segments(sbi)) + << sbi->log_blocks_per_seg; +} + +static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) +{ + return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; +} + +static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t reclaimable_user_blocks = sbi->user_block_count - + written_block_count(sbi); + return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; +} + +static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +{ + if (wait == gc_th->no_gc_sleep_time) + return wait; + + wait += gc_th->min_sleep_time; + if (wait > gc_th->max_sleep_time) + wait = gc_th->max_sleep_time; + return wait; +} + +static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +{ + if (wait == gc_th->no_gc_sleep_time) + wait = gc_th->max_sleep_time; + + wait -= gc_th->min_sleep_time; + if (wait <= gc_th->min_sleep_time) + wait = gc_th->min_sleep_time; + return wait; +} + +static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) +{ + block_t invalid_user_blocks = sbi->user_block_count - + written_block_count(sbi); + /* + * Background GC is triggered with the following condition. + * 1. There are a number of invalid blocks. + * 2. There is not enough free space. + */ + if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && + free_user_blocks(sbi) < limit_free_user_blocks(sbi)) + return true; + return false; +} + +static inline int is_idle(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct request_list *rl = &q->rq; + return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); +} diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c new file mode 100644 index 00000000000..6eb8d269b53 --- /dev/null +++ b/fs/f2fs/hash.c @@ -0,0 +1,101 @@ +/* + * fs/f2fs/hash.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext3/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include "f2fs.h" + +/* + * Hashing code copied from ext3 + */ +#define DELTA 0x9E3779B9 + +static void TEA_transform(unsigned int buf[4], unsigned int const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) +{ + unsigned pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num * 4) + len = num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) +{ + __u32 hash; + f2fs_hash_t f2fs_hash; + const char *p; + __u32 in[8], buf[4]; + + if ((len <= 2) && (name[0] == '.') && + (name[1] == '.' || name[1] == '\0')) + return 0; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + p = name; + while (1) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + p += 16; + if (len <= 16) + break; + len -= 16; + } + hash = buf[0]; + f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); + return f2fs_hash; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c new file mode 100644 index 00000000000..b65e8f22f71 --- /dev/null +++ b/fs/f2fs/inode.c @@ -0,0 +1,273 @@ +/* + * fs/f2fs/inode.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" + +#include + +void f2fs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = F2FS_I(inode)->i_flags; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | + S_NOATIME | S_DIRSYNC); + + if (flags & FS_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & FS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & FS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & FS_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & FS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +static int do_read_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct page *node_page; + struct f2fs_node *rn; + struct f2fs_inode *ri; + + /* Check if ino is within scope */ + if (check_nid_range(sbi, inode->i_ino)) { + f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", + (unsigned long) inode->i_ino); + return -EINVAL; + } + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + rn = F2FS_NODE(node_page); + ri = &(rn->i); + + inode->i_mode = le16_to_cpu(ri->i_mode); + inode->i_uid = le32_to_cpu(ri->i_uid); + inode->i_gid = le32_to_cpu(ri->i_gid); + set_nlink(inode, le32_to_cpu(ri->i_links)); + inode->i_size = le64_to_cpu(ri->i_size); + inode->i_blocks = le64_to_cpu(ri->i_blocks); + + inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); + inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); + inode->i_generation = le32_to_cpu(ri->i_generation); + if (ri->i_addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); + else + inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); + + fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); + fi->i_flags = le32_to_cpu(ri->i_flags); + fi->flags = 0; + fi->i_advise = ri->i_advise; + fi->i_pino = le32_to_cpu(ri->i_pino); + get_extent_info(&fi->ext, ri->i_ext); + get_inline_info(fi, ri); + f2fs_put_page(node_page, 1); + return 0; +} + +struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int ret = 0; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + + if (!(inode->i_state & I_NEW)) { + trace_f2fs_iget(inode); + return inode; + } + if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) + goto make_now; + + ret = do_read_inode(inode); + if (ret) + goto bad_inode; +make_now: + if (ino == F2FS_NODE_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_node_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (ino == F2FS_META_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_meta_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (S_ISREG(inode->i_mode)) { + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + inode->i_op = &f2fs_special_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + } else { + ret = -EIO; + goto bad_inode; + } + unlock_new_inode(inode); + trace_f2fs_iget(inode); + return inode; + +bad_inode: + iget_failed(inode); + trace_f2fs_iget_exit(inode, ret); + return ERR_PTR(ret); +} + +void update_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_node *rn; + struct f2fs_inode *ri; + + f2fs_wait_on_page_writeback(node_page, NODE, false); + + rn = F2FS_NODE(node_page); + ri = &(rn->i); + + ri->i_mode = cpu_to_le16(inode->i_mode); + ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_uid = cpu_to_le32(inode->i_uid); + ri->i_gid = cpu_to_le32(inode->i_gid); + ri->i_links = cpu_to_le32(inode->i_nlink); + ri->i_size = cpu_to_le64(i_size_read(inode)); + ri->i_blocks = cpu_to_le64(inode->i_blocks); + set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + set_raw_inline(F2FS_I(inode), ri); + + ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); + ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); + ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); + ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); + ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_generation = cpu_to_le32(inode->i_generation); + + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + ri->i_addr[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + ri->i_addr[1] = 0; + } else { + ri->i_addr[0] = 0; + ri->i_addr[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + ri->i_addr[2] = 0; + } + } + + set_cold_node(inode, node_page); + set_page_dirty(node_page); + clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); +} + +int update_inode_page(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *node_page; + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + update_inode(inode, node_page); + f2fs_put_page(node_page, 1); + return 0; +} + +int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret, ilock; + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return 0; + + if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + return 0; + + /* + * We need to lock here to prevent from producing dirty node pages + * during the urgent cleaning time when runing out of free sections. + */ + ilock = mutex_lock_op(sbi); + ret = update_inode_page(inode); + mutex_unlock_op(sbi, ilock); + + if (wbc) + f2fs_balance_fs(sbi); + + return ret; +} + +/* + * Called at the last iput() if i_nlink is zero + */ +void f2fs_evict_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + + trace_f2fs_evict_inode(inode); + truncate_inode_pages(&inode->i_data, 0); + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + goto no_delete; + + BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); + remove_dirty_dir_inode(inode); + + if (inode->i_nlink || is_bad_inode(inode)) + goto no_delete; + + set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); + i_size_write(inode, 0); + + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + ilock = mutex_lock_op(sbi); + remove_inode_page(inode); + mutex_unlock_op(sbi, ilock); + +no_delete: + end_writeback(inode); +} diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c new file mode 100644 index 00000000000..aa0c4539ab0 --- /dev/null +++ b/fs/f2fs/namei.c @@ -0,0 +1,557 @@ +/* + * fs/f2fs/namei.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "xattr.h" +#include "acl.h" +#include + +static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + nid_t ino; + struct inode *inode; + bool nid_free = false; + int err, ilock; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + ilock = mutex_lock_op(sbi); + if (!alloc_nid(sbi, &ino)) { + mutex_unlock_op(sbi, ilock); + err = -ENOSPC; + goto fail; + } + mutex_unlock_op(sbi, ilock); + + if (IS_ANDROID_EMU(sbi, F2FS_I(dir), F2FS_I(dir))) + f2fs_android_emu(sbi, inode, &inode->i_uid, + &inode->i_gid, &mode); + else { + inode->i_uid = current_fsuid(); + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else { + inode->i_gid = current_fsgid(); + } + } + + inode->i_ino = ino; + inode->i_mode = mode; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_generation = sbi->s_next_generation++; + + err = insert_inode_locked(inode); + if (err) { + err = -EINVAL; + nid_free = true; + goto out; + } + trace_f2fs_new_inode(inode, 0); + mark_inode_dirty(inode); + return inode; + +out: + clear_nlink(inode); + unlock_new_inode(inode); +fail: + trace_f2fs_new_inode(inode, err); + make_bad_inode(inode); + iput(inode); + if (nid_free) + alloc_nid_failed(sbi, ino); + return ERR_PTR(err); +} + +static int is_multimedia_file(const unsigned char *s, const char *sub) +{ + size_t slen = strlen(s); + size_t sublen = strlen(sub); + + if (sublen > slen) + return 0; + + return !strncasecmp(s + slen - sublen, sub, sublen); +} + +/* + * Set multimedia files as cold files for hot/cold data separation + */ +static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + int i; + __u8 (*extlist)[8] = sbi->raw_super->extension_list; + + int count = le32_to_cpu(sbi->raw_super->extension_count); + for (i = 0; i < count; i++) { + if (is_multimedia_file(name, extlist[i])) { + file_set_cold(inode); + break; + } + } +} + +static int f2fs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + nid_t ino = 0; + int err, ilock; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) + set_cold_files(sbi, inode, dentry->d_name.name); + + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + ino = inode->i_ino; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + alloc_nid_done(sbi, ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, ino); + return err; +} + +static int f2fs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct super_block *sb; + struct f2fs_sb_info *sbi; + int err, ilock; + + if (inode->i_nlink >= F2FS_LINK_MAX) + return -EMLINK; + + sb = dir->i_sb; + sbi = F2FS_SB(sb); + + f2fs_balance_fs(sbi); + + inode->i_ctime = CURRENT_TIME; + ihold(inode); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + d_instantiate(dentry, inode); + return 0; +out: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + iput(inode); + return err; +} + +struct dentry *f2fs_get_parent(struct dentry *child) +{ + struct qstr dotdot = {.name = "..", .len = 2}; + unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); +} + +static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode = NULL; + struct f2fs_dir_entry *de; + struct page *page; + + if (dentry->d_name.len > F2FS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (de) { + nid_t ino = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + + inode = f2fs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + + return d_splice_alias(inode, dentry); +} + +static int f2fs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode = dentry->d_inode; + struct f2fs_dir_entry *de; + struct page *page; + int err = -ENOENT; + int ilock; + + trace_f2fs_unlink_enter(dir, dentry); + f2fs_balance_fs(sbi); + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (!de) + goto fail; + + err = acquire_orphan_inode(sbi); + if (err) { + kunmap(page); + f2fs_put_page(page, 0); + goto fail; + } + + ilock = mutex_lock_op(sbi); + f2fs_delete_entry(de, page, inode); + mutex_unlock_op(sbi, ilock); + + /* In order to evict this inode, we set it dirty */ + mark_inode_dirty(inode); +fail: + trace_f2fs_unlink_exit(inode, err); + return err; +} + +static int f2fs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + size_t symlen = strlen(symname) + 1; + int err, ilock; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + err = page_symlink(inode, symname, symlen); + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return err; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct f2fs_sb_info *sbi; + struct inode *inode; + int err, ilock; + + if (dir->i_nlink >= F2FS_LINK_MAX) + return -EMLINK; + + sbi = F2FS_SB(dir->i_sb); + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, S_IFDIR | mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out_fail; + + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + return 0; + +out_fail: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + if (f2fs_empty_dir(inode)) + return f2fs_unlink(dir, dentry); + return -ENOTEMPTY; +} + +static int f2fs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int err = 0; + int ilock; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + init_special_inode(inode, inode->i_mode, rdev); + inode->i_op = &f2fs_special_inode_operations; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct super_block *sb = old_dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct page *old_dir_page; + struct page *old_page, *new_page; + struct f2fs_dir_entry *old_dir_entry = NULL; + struct f2fs_dir_entry *old_entry; + struct f2fs_dir_entry *new_entry; + int err = -ENOENT, ilock = -1; + + f2fs_balance_fs(sbi); + + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + if (!old_entry) + goto out; + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); + if (!old_dir_entry) + goto out_old; + } + + ilock = mutex_lock_op(sbi); + + if (new_inode) { + + err = -ENOTEMPTY; + if (old_dir_entry && !f2fs_empty_dir(new_inode)) + goto out_dir; + + err = -ENOENT; + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, + &new_page); + if (!new_entry) + goto out_dir; + + err = acquire_orphan_inode(sbi); + if (err) + goto put_out_dir; + + if (update_dent_inode(old_inode, &new_dentry->d_name)) { + release_orphan_inode(sbi); + goto put_out_dir; + } + + f2fs_set_link(new_dir, new_entry, new_page, old_inode); + + new_inode->i_ctime = CURRENT_TIME; + if (old_dir_entry) + drop_nlink(new_inode); + drop_nlink(new_inode); + + if (!new_inode->i_nlink) + add_orphan_inode(sbi, new_inode->i_ino); + else + release_orphan_inode(sbi); + + update_inode_page(old_inode); + update_inode_page(new_inode); + } else { + if (old_dir_entry) { + err = -EMLINK; + if (new_dir->i_nlink >= F2FS_LINK_MAX) + goto out_dir; + } + + err = f2fs_add_link(new_dentry, old_inode); + if (err) + goto out_dir; + + if (old_dir_entry) { + inc_nlink(new_dir); + update_inode_page(new_dir); + } + } + + old_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(old_inode); + + f2fs_delete_entry(old_entry, old_page, NULL); + + if (old_dir_entry) { + if (old_dir != new_dir) { + f2fs_set_link(old_inode, old_dir_entry, + old_dir_page, new_dir); + } else { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + drop_nlink(old_dir); + update_inode_page(old_dir); + } + + mutex_unlock_op(sbi, ilock); + return 0; + +put_out_dir: + if (PageLocked(new_page)) + f2fs_put_page(new_page, 1); + else + f2fs_put_page(new_page, 0); +out_dir: + if (old_dir_entry) { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + mutex_unlock_op(sbi, ilock); +out_old: + kunmap(old_page); + f2fs_put_page(old_page, 0); +out: + return err; +} + +const struct inode_operations f2fs_dir_inode_operations = { + .create = f2fs_create, + .lookup = f2fs_lookup, + .link = f2fs_link, + .unlink = f2fs_unlink, + .symlink = f2fs_symlink, + .mkdir = f2fs_mkdir, + .rmdir = f2fs_rmdir, + .mknod = f2fs_mknod, + .rename = f2fs_rename, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_special_inode_operations = { + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c new file mode 100644 index 00000000000..eac51226293 --- /dev/null +++ b/fs/f2fs/node.c @@ -0,0 +1,1859 @@ +/* + * fs/f2fs/node.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +static struct kmem_cache *nat_entry_slab; +static struct kmem_cache *free_nid_slab; + +static void clear_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + unsigned int long flags; + + if (PageDirty(page)) { + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + + clear_page_dirty_for_io(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + } + ClearPageUptodate(page); +} + +static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + pgoff_t index = current_nat_addr(sbi, nid); + return get_meta_page(sbi, index); +} + +static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct page *src_page; + struct page *dst_page; + pgoff_t src_off; + pgoff_t dst_off; + void *src_addr; + void *dst_addr; + struct f2fs_nm_info *nm_i = NM_I(sbi); + + src_off = current_nat_addr(sbi, nid); + dst_off = next_nat_addr(sbi, src_off); + + /* get current nat block page with lock */ + src_page = get_meta_page(sbi, src_off); + + /* Dirty src_page means that it is already the new target NAT page. */ + if (PageDirty(src_page)) + return src_page; + + dst_page = grab_meta_page(sbi, dst_off); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_nat(nm_i, nid); + + return dst_page; +} + +/* + * Readahead NAT pages + */ +static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct blk_plug plug; + struct page *page; + pgoff_t index; + int i; + + blk_start_plug(&plug); + + for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { + if (nid >= nm_i->max_nid) + nid = 0; + index = current_nat_addr(sbi, nid); + + page = grab_cache_page(mapping, index); + if (!page) + continue; + if (PageUptodate(page)) { + f2fs_put_page(page, 1); + continue; + } + if (f2fs_readpage(sbi, page, index, READ)) + continue; + + f2fs_put_page(page, 0); + } + blk_finish_plug(&plug); +} + +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +{ + return radix_tree_lookup(&nm_i->nat_root, n); +} + +static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, + nid_t start, unsigned int nr, struct nat_entry **ep) +{ + return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); +} + +static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) +{ + list_del(&e->list); + radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); + nm_i->nat_cnt--; + kmem_cache_free(nat_entry_slab, e); +} + +int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + int is_cp = 1; + + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e && !e->checkpointed) + is_cp = 0; + read_unlock(&nm_i->nat_tree_lock); + return is_cp; +} + +static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct nat_entry *new; + + new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + if (!new) + return NULL; + if (radix_tree_insert(&nm_i->nat_root, nid, new)) { + kmem_cache_free(nat_entry_slab, new); + return NULL; + } + memset(new, 0, sizeof(struct nat_entry)); + nat_set_nid(new, nid); + list_add_tail(&new->list, &nm_i->nat_entries); + nm_i->nat_cnt++; + return new; +} + +static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, + struct f2fs_nat_entry *ne) +{ + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (!e) { + e = grab_nat_entry(nm_i, nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); + nat_set_ino(e, le32_to_cpu(ne->ino)); + nat_set_version(e, ne->version); + e->checkpointed = true; + } + write_unlock(&nm_i->nat_tree_lock); +} + +static int set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, + block_t new_blkaddr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, ni->nid); + if (!e) { + e = grab_nat_entry(nm_i, ni->nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + e->ni = *ni; + e->checkpointed = true; + BUG_ON(ni->blk_addr == NEW_ADDR); + } else if (new_blkaddr == NEW_ADDR) { + /* + * when nid is reallocated, + * previous nat entry can be remained in nat cache. + * So, reinitialize it with new information. + */ + e->ni = *ni; + if (ni->blk_addr != NULL_ADDR) { + f2fs_msg(sbi->sb, KERN_ERR, "node block address is " + "already set: %u", ni->blk_addr); + f2fs_handle_error(sbi); + /* just give up on this node */ + write_unlock(&nm_i->nat_tree_lock); + return -EIO; + } + } + + if (new_blkaddr == NEW_ADDR) + e->checkpointed = false; + + /* sanity check */ + BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); + BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && + new_blkaddr == NULL_ADDR); + BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && + new_blkaddr == NEW_ADDR); + BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && + nat_get_blkaddr(e) != NULL_ADDR && + new_blkaddr == NEW_ADDR); + + /* increament version no as node is removed */ + if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { + unsigned char version = nat_get_version(e); + nat_set_version(e, inc_node_version(version)); + } + + /* change address */ + nat_set_blkaddr(e, new_blkaddr); + __set_nat_cache_dirty(nm_i, e); + write_unlock(&nm_i->nat_tree_lock); + return 0; +} + +static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) + return 0; + + write_lock(&nm_i->nat_tree_lock); + while (nr_shrink && !list_empty(&nm_i->nat_entries)) { + struct nat_entry *ne; + ne = list_first_entry(&nm_i->nat_entries, + struct nat_entry, list); + __del_from_nat_cache(nm_i, ne); + nr_shrink--; + } + write_unlock(&nm_i->nat_tree_lock); + return nr_shrink; +} + +/* + * This function returns always success + */ +void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + nid_t start_nid = START_NID(nid); + struct f2fs_nat_block *nat_blk; + struct page *page = NULL; + struct f2fs_nat_entry ne; + struct nat_entry *e; + int i; + + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + ni->nid = nid; + + /* Check nat cache */ + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) { + ni->ino = nat_get_ino(e); + ni->blk_addr = nat_get_blkaddr(e); + ni->version = nat_get_version(e); + } + read_unlock(&nm_i->nat_tree_lock); + if (e) + return; + + /* Check current segment summary */ + mutex_lock(&curseg->curseg_mutex); + i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); + if (i >= 0) { + ne = nat_in_journal(sum, i); + node_info_from_raw_nat(ni, &ne); + } + mutex_unlock(&curseg->curseg_mutex); + if (i >= 0) + goto cache; + + /* Fill node_info from nat page */ + page = get_current_nat_page(sbi, start_nid); + nat_blk = (struct f2fs_nat_block *)page_address(page); + ne = nat_blk->entries[nid - start_nid]; + node_info_from_raw_nat(ni, &ne); + f2fs_put_page(page, 1); +cache: + /* cache nat entry */ + cache_nat_entry(NM_I(sbi), nid, &ne); +} + +/* + * The maximum depth is four. + * Offset[0] will have raw inode offset. + */ +static int get_node_path(struct f2fs_inode_info *fi, long block, + int offset[4], unsigned int noffset[4]) +{ + const long direct_index = ADDRS_PER_INODE(fi); + const long direct_blks = ADDRS_PER_BLOCK; + const long dptrs_per_blk = NIDS_PER_BLOCK; + const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; + const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; + int n = 0; + int level = 0; + + noffset[0] = 0; + + if (block < direct_index) { + offset[n] = block; + goto got; + } + block -= direct_index; + if (block < direct_blks) { + offset[n++] = NODE_DIR1_BLOCK; + noffset[n] = 1; + offset[n] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < direct_blks) { + offset[n++] = NODE_DIR2_BLOCK; + noffset[n] = 2; + offset[n] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND1_BLOCK; + noffset[n] = 3; + offset[n++] = block / direct_blks; + noffset[n] = 4 + offset[n - 1]; + offset[n] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND2_BLOCK; + noffset[n] = 4 + dptrs_per_blk; + offset[n++] = block / direct_blks; + noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; + offset[n] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < dindirect_blks) { + offset[n++] = NODE_DIND_BLOCK; + noffset[n] = 5 + (dptrs_per_blk * 2); + offset[n++] = block / indirect_blks; + noffset[n] = 6 + (dptrs_per_blk * 2) + + offset[n - 1] * (dptrs_per_blk + 1); + offset[n++] = (block / direct_blks) % dptrs_per_blk; + noffset[n] = 7 + (dptrs_per_blk * 2) + + offset[n - 2] * (dptrs_per_blk + 1) + + offset[n - 1]; + offset[n] = block % direct_blks; + level = 3; + goto got; + } else { + BUG(); + } +got: + return level; +} + +/* + * Caller should call f2fs_put_dnode(dn). + * Also, it should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op() only if ro is not set RDONLY_NODE. + * In the case of RDONLY_NODE, we don't need to care about mutex. + */ +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *npage[4]; + struct page *parent; + int offset[4]; + unsigned int noffset[4]; + nid_t nids[4]; + int level, i; + int err = 0; + + level = get_node_path(F2FS_I(dn->inode), index, offset, noffset); + + nids[0] = dn->inode->i_ino; + npage[0] = dn->inode_page; + + if (!npage[0]) { + npage[0] = get_node_page(sbi, nids[0]); + if (IS_ERR(npage[0])) + return PTR_ERR(npage[0]); + } + parent = npage[0]; + if (level != 0) + nids[1] = get_nid(parent, offset[0], true); + dn->inode_page = npage[0]; + dn->inode_page_locked = true; + + /* get indirect or direct nodes */ + for (i = 1; i <= level; i++) { + bool done = false; + + if (!nids[i] && mode == ALLOC_NODE) { + /* alloc new node */ + if (!alloc_nid(sbi, &(nids[i]))) { + err = -ENOSPC; + goto release_pages; + } + + dn->nid = nids[i]; + npage[i] = new_node_page(dn, noffset[i], NULL); + if (IS_ERR(npage[i])) { + alloc_nid_failed(sbi, nids[i]); + err = PTR_ERR(npage[i]); + goto release_pages; + } + + set_nid(parent, offset[i - 1], nids[i], i == 1); + alloc_nid_done(sbi, nids[i]); + done = true; + } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { + npage[i] = get_node_page_ra(parent, offset[i - 1]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + goto release_pages; + } + done = true; + } + if (i == 1) { + dn->inode_page_locked = false; + unlock_page(parent); + } else { + f2fs_put_page(parent, 1); + } + + if (!done) { + npage[i] = get_node_page(sbi, nids[i]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + f2fs_put_page(npage[0], 0); + goto release_out; + } + } + if (i < level) { + parent = npage[i]; + nids[i + 1] = get_nid(parent, offset[i], false); + } + } + dn->nid = nids[level]; + dn->ofs_in_node = offset[level]; + dn->node_page = npage[level]; + dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + return 0; + +release_pages: + f2fs_put_page(parent, 1); + if (i > 1) + f2fs_put_page(npage[0], 0); +release_out: + dn->inode_page = NULL; + dn->node_page = NULL; + return err; +} + +static void truncate_node(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct node_info ni; + + get_node_info(sbi, dn->nid, &ni); + if (dn->inode->i_blocks == 0) { + if (ni.blk_addr != NULL_ADDR) { + f2fs_msg(sbi->sb, KERN_ERR, + "empty node still has block address %u ", + ni.blk_addr); + f2fs_handle_error(sbi); + } + goto invalidate; + } + BUG_ON(ni.blk_addr == NULL_ADDR); + + /* Deallocate node address */ + invalidate_blocks(sbi, ni.blk_addr); + dec_valid_node_count(sbi, dn->inode, 1); + set_node_addr(sbi, &ni, NULL_ADDR); + + if (dn->nid == dn->inode->i_ino) { + remove_orphan_inode(sbi, dn->nid); + dec_valid_inode_count(sbi); + } else { + sync_inode_page(dn); + } +invalidate: + clear_node_page_dirty(dn->node_page); + F2FS_SET_SB_DIRT(sbi); + + f2fs_put_page(dn->node_page, 1); + dn->node_page = NULL; + trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); +} + +static int truncate_dnode(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *page; + + if (dn->nid == 0) + return 1; + + /* get direct node */ + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) + return 1; + else if (IS_ERR(page)) + return PTR_ERR(page); + + /* Make dnode_of_data for parameter */ + dn->node_page = page; + dn->ofs_in_node = 0; + truncate_data_blocks(dn); + truncate_node(dn); + return 1; +} + +static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, + int ofs, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct dnode_of_data rdn = *dn; + struct page *page; + struct f2fs_node *rn; + nid_t child_nid; + unsigned int child_nofs; + int freed = 0; + int i, ret; + + if (dn->nid == 0) + return NIDS_PER_BLOCK + 1; + + trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); + + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page)) { + trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); + return PTR_ERR(page); + } + + rn = F2FS_NODE(page); + if (depth < 3) { + for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) + continue; + rdn.nid = child_nid; + ret = truncate_dnode(&rdn); + if (ret < 0) + goto out_err; + set_nid(page, i, 0, false); + } + } else { + child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; + for (i = ofs; i < NIDS_PER_BLOCK; i++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) { + child_nofs += NIDS_PER_BLOCK + 1; + continue; + } + rdn.nid = child_nid; + ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); + if (ret == (NIDS_PER_BLOCK + 1)) { + set_nid(page, i, 0, false); + child_nofs += ret; + } else if (ret < 0 && ret != -ENOENT) { + goto out_err; + } + } + freed = child_nofs; + } + + if (!ofs) { + /* remove current indirect node */ + dn->node_page = page; + truncate_node(dn); + freed++; + } else { + f2fs_put_page(page, 1); + } + trace_f2fs_truncate_nodes_exit(dn->inode, freed); + return freed; + +out_err: + f2fs_put_page(page, 1); + trace_f2fs_truncate_nodes_exit(dn->inode, ret); + return ret; +} + +static int truncate_partial_nodes(struct dnode_of_data *dn, + struct f2fs_inode *ri, int *offset, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *pages[2]; + nid_t nid[3]; + nid_t child_nid; + int err = 0; + int i; + int idx = depth - 2; + + nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + if (!nid[0]) + return 0; + + /* get indirect nodes in the path */ + for (i = 0; i < depth - 1; i++) { + /* refernece count'll be increased */ + pages[i] = get_node_page(sbi, nid[i]); + if (IS_ERR(pages[i])) { + depth = i + 1; + err = PTR_ERR(pages[i]); + goto fail; + } + nid[i + 1] = get_nid(pages[i], offset[i + 1], false); + } + + /* free direct nodes linked to a partial indirect node */ + for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { + child_nid = get_nid(pages[idx], i, false); + if (!child_nid) + continue; + dn->nid = child_nid; + err = truncate_dnode(dn); + if (err < 0) + goto fail; + set_nid(pages[idx], i, 0, false); + } + + if (offset[depth - 1] == 0) { + dn->node_page = pages[idx]; + dn->nid = nid[idx]; + truncate_node(dn); + } else { + f2fs_put_page(pages[idx], 1); + } + offset[idx]++; + offset[depth - 1] = 0; +fail: + for (i = depth - 3; i >= 0; i--) + f2fs_put_page(pages[i], 1); + + trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); + + return err; +} + +/* + * All the block addresses of data and nodes should be nullified. + */ +int truncate_inode_blocks(struct inode *inode, pgoff_t from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *node_mapping = sbi->node_inode->i_mapping; + int err = 0, cont = 1; + int level, offset[4], noffset[4]; + unsigned int nofs = 0; + struct f2fs_node *rn; + struct dnode_of_data dn; + struct page *page; + + trace_f2fs_truncate_inode_blocks_enter(inode, from); + + level = get_node_path(F2FS_I(inode), from, offset, noffset); +restart: + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); + return PTR_ERR(page); + } + + set_new_dnode(&dn, inode, page, NULL, 0); + unlock_page(page); + + rn = F2FS_NODE(page); + switch (level) { + case 0: + case 1: + nofs = noffset[1]; + break; + case 2: + nofs = noffset[1]; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + nofs += 1 + NIDS_PER_BLOCK; + break; + case 3: + nofs = 5 + 2 * NIDS_PER_BLOCK; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + break; + default: + BUG(); + } + +skip_partial: + while (cont) { + dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); + switch (offset[0]) { + case NODE_DIR1_BLOCK: + case NODE_DIR2_BLOCK: + err = truncate_dnode(&dn); + break; + + case NODE_IND1_BLOCK: + case NODE_IND2_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 2); + break; + + case NODE_DIND_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 3); + cont = 0; + break; + + default: + BUG(); + } + if (err < 0 && err != -ENOENT) + goto fail; + if (offset[1] == 0 && + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { + lock_page(page); + if (page->mapping != node_mapping) { + f2fs_put_page(page, 1); + goto restart; + } + wait_on_page_writeback(page); + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; + set_page_dirty(page); + unlock_page(page); + } + offset[1] = 0; + offset[0]++; + nofs += err; + } +fail: + f2fs_put_page(page, 0); + trace_f2fs_truncate_inode_blocks_exit(inode, err); + return err > 0 ? 0 : err; +} + +int truncate_xattr_node(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + nid_t nid = F2FS_I(inode)->i_xattr_nid; + struct dnode_of_data dn; + struct page *npage; + + if (!nid) + return 0; + + npage = get_node_page(sbi, nid); + if (IS_ERR(npage)) + return PTR_ERR(npage); + + F2FS_I(inode)->i_xattr_nid = 0; + + /* need to do checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + + set_new_dnode(&dn, inode, page, npage, nid); + + if (page) + dn.inode_page_locked = 1; + truncate_node(&dn); + return 0; +} + +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ +int remove_inode_page(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + nid_t ino = inode->i_ino; + struct dnode_of_data dn; + int err; + + page = get_node_page(sbi, ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + err = truncate_xattr_node(inode, page); + if (err) { + f2fs_put_page(page, 1); + return err; + } + + /* 0 is possible, after f2fs_new_inode() is failed */ + if (inode->i_blocks != 0 && inode->i_blocks != 1) { + f2fs_msg(sbi->sb, KERN_ERR, "inode %u still has %llu blocks", + ino, inode->i_blocks); + f2fs_handle_error(sbi); + } + set_new_dnode(&dn, inode, page, page, ino); + truncate_node(&dn); + return 0; +} + +struct page *new_inode_page(struct inode *inode, const struct qstr *name) +{ + struct dnode_of_data dn; + + /* allocate inode page for new inode */ + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); + + /* caller should f2fs_put_page(page, 1); */ + return new_node_page(&dn, 0, NULL); +} + +struct page *new_node_page(struct dnode_of_data *dn, + unsigned int ofs, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + struct node_info old_ni, new_ni; + struct page *page; + int err; + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return ERR_PTR(-EPERM); + + page = grab_cache_page(mapping, dn->nid); + if (!page) + return ERR_PTR(-ENOMEM); + + if (!inc_valid_node_count(sbi, dn->inode, 1)) { + err = -ENOSPC; + goto fail; + } + + get_node_info(sbi, dn->nid, &old_ni); + + /* Reinitialize old_ni with new node page */ + BUG_ON(old_ni.blk_addr != NULL_ADDR); + new_ni = old_ni; + new_ni.ino = dn->inode->i_ino; + set_node_addr(sbi, &new_ni, NEW_ADDR); + + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); + set_cold_node(dn->inode, page); + SetPageUptodate(page); + set_page_dirty(page); + + if (ofs == XATTR_NODE_OFFSET) + F2FS_I(dn->inode)->i_xattr_nid = dn->nid; + + dn->node_page = page; + if (ipage) + update_inode(dn->inode, ipage); + else + sync_inode_page(dn); + if (ofs == 0) + inc_valid_inode_count(sbi); + + return page; + +fail: + clear_node_page_dirty(page); + f2fs_put_page(page, 1); + return ERR_PTR(err); +} + +/* + * Caller should do after getting the following values. + * 0: f2fs_put_page(page, 0) + * LOCKED_PAGE: f2fs_put_page(page, 1) + * error: nothing + */ +static int read_node_page(struct page *page, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct node_info ni; + + get_node_info(sbi, page->index, &ni); + + if (ni.blk_addr == NULL_ADDR) { + f2fs_put_page(page, 1); + return -ENOENT; + } + + if (PageUptodate(page)) + return LOCKED_PAGE; + + return f2fs_readpage(sbi, page, ni.blk_addr, type); +} + +/* + * Readahead a node page + */ +void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *apage; + int err; + + apage = find_get_page(mapping, nid); + if (apage && PageUptodate(apage)) { + f2fs_put_page(apage, 0); + return; + } + f2fs_put_page(apage, 0); + + apage = grab_cache_page(mapping, nid); + if (!apage) + return; + + err = read_node_page(apage, READA); + if (err == 0) + f2fs_put_page(apage, 0); + else if (err == LOCKED_PAGE) + f2fs_put_page(apage, 1); +} + +struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *page; + int err; +repeat: + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READ_SYNC); + if (err < 0) + return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto got_it; + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +got_it: + if (nid != nid_of_node(page)) { + f2fs_msg(sbi->sb, KERN_ERR, "page node id does not match " + "request: %lu", nid); + f2fs_handle_error(sbi); + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + mark_page_accessed(page); + return page; +} + +/* + * Return a locked page for the desired node page. + * And, readahead MAX_RA_NODE number of node pages. + */ +struct page *get_node_page_ra(struct page *parent, int start) +{ + struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + struct blk_plug plug; + struct page *page; + int err, i, end; + nid_t nid; + + /* First, try getting the desired direct node. */ + nid = get_nid(parent, start, false); + if (!nid) + return ERR_PTR(-ENOENT); +repeat: + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READ_SYNC); + if (err < 0) + return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto page_hit; + + blk_start_plug(&plug); + + /* Then, try readahead for siblings of the desired node */ + end = start + MAX_RA_NODE; + end = min(end, NIDS_PER_BLOCK); + for (i = start + 1; i < end; i++) { + nid = get_nid(parent, i, false); + if (!nid) + continue; + ra_node_page(sbi, nid); + } + + blk_finish_plug(&plug); + + lock_page(page); + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +page_hit: + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + mark_page_accessed(page); + return page; +} + +void sync_inode_page(struct dnode_of_data *dn) +{ + if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { + update_inode(dn->inode, dn->node_page); + } else if (dn->inode_page) { + if (!dn->inode_page_locked) + lock_page(dn->inode_page); + update_inode(dn->inode, dn->inode_page); + if (!dn->inode_page_locked) + unlock_page(dn->inode_page); + } else { + update_inode_page(dn->inode); + } +} + +int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, + struct writeback_control *wbc) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + pgoff_t index, end; + struct pagevec pvec; + int step = ino ? 2 : 0; + int nwritten = 0, wrote = 0; + + pagevec_init(&pvec, 0); + +next_step: + index = 0; + end = LONG_MAX; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * flushing sequence with step: + * 0. indirect nodes + * 1. dentry dnodes + * 2. file dnodes + */ + if (step == 0 && IS_DNODE(page)) + continue; + if (step == 1 && (!IS_DNODE(page) || + is_cold_node(page))) + continue; + if (step == 2 && (!IS_DNODE(page) || + !is_cold_node(page))) + continue; + + /* + * If an fsync mode, + * we should not skip writing node pages. + */ + if (ino && ino_of_node(page) == ino) + lock_page(page); + else if (!trylock_page(page)) + continue; + + if (unlikely(page->mapping != mapping)) { +continue_unlock: + unlock_page(page); + continue; + } + if (ino && ino_of_node(page) != ino) + goto continue_unlock; + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + /* called by fsync() */ + if (ino && IS_DNODE(page)) { + int mark = !is_checkpointed_node(sbi, ino); + set_fsync_mark(page, 1); + if (IS_INODE(page)) + set_dentry_mark(page, mark); + nwritten++; + } else { + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); + } + mapping->a_ops->writepage(page, wbc); + wrote++; + + if (--wbc->nr_to_write == 0) + break; + } + pagevec_release(&pvec); + cond_resched(); + + if (wbc->nr_to_write == 0) { + step = 2; + break; + } + } + + if (step < 2) { + step++; + goto next_step; + } + + if (wrote) + f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); + + return nwritten; +} + +static int f2fs_write_node_page(struct page *page, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + nid_t nid; + block_t new_addr; + struct node_info ni; + + if (sbi->por_doing) + goto redirty_out; + + wait_on_page_writeback(page); + + /* get old block addr of this node page */ + nid = nid_of_node(page); + BUG_ON(page->index != nid); + + get_node_info(sbi, nid, &ni); + + /* This page is already truncated */ + if (ni.blk_addr == NULL_ADDR) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } + + if (wbc->for_reclaim) + goto redirty_out; + + mutex_lock(&sbi->node_write); + set_page_writeback(page); + write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); + set_node_addr(sbi, &ni, new_addr); + dec_page_count(sbi, F2FS_DIRTY_NODES); + mutex_unlock(&sbi->node_write); + unlock_page(page); + return 0; + +redirty_out: + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; +} + +/* + * It is very important to gather dirty pages and write at once, so that we can + * submit a big bio without interfering other data writes. + * Be default, 512 pages (2MB), a segment size, is quite reasonable. + */ +#define COLLECT_DIRTY_NODES 512 +static int f2fs_write_node_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + long nr_to_write = wbc->nr_to_write; + + /* First check balancing cached NAT entries */ + if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { + f2fs_sync_fs(sbi->sb, true); + return 0; + } + + /* collect a number of dirty node pages and write together */ + if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) + return 0; + + /* if mounting is failed, skip writing node pages */ + wbc->nr_to_write = max_hw_blocks(sbi); + sync_node_pages(sbi, 0, wbc); + wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); + return 0; +} + +static int f2fs_set_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_NODES); + SetPagePrivate(page); + return 1; + } + return 0; +} + +static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (PageDirty(page)) + dec_page_count(sbi, F2FS_DIRTY_NODES); + ClearPagePrivate(page); +} + +static int f2fs_release_node_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + +/* + * Structure of the f2fs node operations + */ +const struct address_space_operations f2fs_node_aops = { + .writepage = f2fs_write_node_page, + .writepages = f2fs_write_node_pages, + .set_page_dirty = f2fs_set_node_page_dirty, + .invalidatepage = f2fs_invalidate_node_page, + .releasepage = f2fs_release_node_page, +}; + +static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) +{ + struct list_head *this; + struct free_nid *i; + list_for_each(this, head) { + i = list_entry(this, struct free_nid, list); + if (i->nid == n) + return i; + } + return NULL; +} + +static void __del_from_free_nid_list(struct free_nid *i) +{ + list_del(&i->list); + kmem_cache_free(free_nid_slab, i); +} + +static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +{ + struct free_nid *i; + struct nat_entry *ne; + bool allocated = false; + + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) + return -1; + + /* 0 nid should not be used */ + if (nid == 0) + return 0; + + if (!build) + goto retry; + + /* do not add allocated nids */ + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + allocated = true; + read_unlock(&nm_i->nat_tree_lock); + if (allocated) + return 0; +retry: + i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); + if (!i) { + cond_resched(); + goto retry; + } + i->nid = nid; + i->state = NID_NEW; + + spin_lock(&nm_i->free_nid_list_lock); + if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { + spin_unlock(&nm_i->free_nid_list_lock); + kmem_cache_free(free_nid_slab, i); + return 0; + } + list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->fcnt++; + spin_unlock(&nm_i->free_nid_list_lock); + return 1; +} + +static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct free_nid *i; + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + if (i && i->state == NID_NEW) { + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +static void scan_nat_page(struct f2fs_nm_info *nm_i, + struct page *nat_page, nid_t start_nid) +{ + struct f2fs_nat_block *nat_blk = page_address(nat_page); + block_t blk_addr; + int i; + + i = start_nid % NAT_ENTRY_PER_BLOCK; + + for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + + if (start_nid >= nm_i->max_nid) + break; + + blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); + BUG_ON(blk_addr == NEW_ADDR); + if (blk_addr == NULL_ADDR) { + if (add_free_nid(nm_i, start_nid, true) < 0) + break; + } + } +} + +static void build_free_nids(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i = 0; + nid_t nid = nm_i->next_scan_nid; + + /* Enough entries */ + if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + return; + + /* readahead nat pages to be scanned */ + ra_nat_pages(sbi, nid); + + while (1) { + struct page *page = get_current_nat_page(sbi, nid); + + scan_nat_page(nm_i, page, nid); + f2fs_put_page(page, 1); + + nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); + if (nid >= nm_i->max_nid) + nid = 0; + + if (i++ == FREE_NID_PAGES) + break; + } + + /* go to the next free nat pages to find free nids abundantly */ + nm_i->next_scan_nid = nid; + + /* find free nids from current sum_pages */ + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < nats_in_cursum(sum); i++) { + block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); + nid = le32_to_cpu(nid_in_journal(sum, i)); + if (addr == NULL_ADDR) + add_free_nid(nm_i, nid, true); + else + remove_free_nid(nm_i, nid); + } + mutex_unlock(&curseg->curseg_mutex); +} + +/* + * If this function returns success, caller can obtain a new nid + * from second parameter of this function. + * The returned nid could be used ino as well as nid when inode is created. + */ +bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i = NULL; + struct list_head *this; +retry: + if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) + return false; + + spin_lock(&nm_i->free_nid_list_lock); + + /* We should not use stale free nids created by build_free_nids */ + if (nm_i->fcnt && !sbi->on_build_free_nids) { + BUG_ON(list_empty(&nm_i->free_nid_list)); + list_for_each(this, &nm_i->free_nid_list) { + i = list_entry(this, struct free_nid, list); + if (i->state == NID_NEW) + break; + } + + BUG_ON(i->state != NID_NEW); + *nid = i->nid; + i->state = NID_ALLOC; + nm_i->fcnt--; + spin_unlock(&nm_i->free_nid_list_lock); + return true; + } + spin_unlock(&nm_i->free_nid_list_lock); + + /* Let's scan nat pages and its caches to get free nids */ + mutex_lock(&nm_i->build_lock); + sbi->on_build_free_nids = 1; + build_free_nids(sbi); + sbi->on_build_free_nids = 0; + mutex_unlock(&nm_i->build_lock); + goto retry; +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + __del_from_free_nid_list(i); + spin_unlock(&nm_i->free_nid_list_lock); +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + if (!nid) + return; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { + __del_from_free_nid_list(i); + } else { + i->state = NID_NEW; + nm_i->fcnt++; + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, + struct f2fs_summary *sum, struct node_info *ni, + block_t new_blkaddr) +{ + rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); + set_node_addr(sbi, ni, new_blkaddr); + clear_node_page_dirty(page); +} + +int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct f2fs_node *src, *dst; + nid_t ino = ino_of_node(page); + struct node_info old_ni, new_ni; + struct page *ipage; + int err; + + ipage = grab_cache_page(mapping, ino); + if (!ipage) + return -ENOMEM; + + /* Should not use this inode from free nid list */ + remove_free_nid(NM_I(sbi), ino); + + get_node_info(sbi, ino, &old_ni); + SetPageUptodate(ipage); + fill_node_footer(ipage, ino, ino, 0, true); + + src = F2FS_NODE(page); + dst = F2FS_NODE(ipage); + + memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); + dst->i.i_size = 0; + dst->i.i_blocks = cpu_to_le64(1); + dst->i.i_links = cpu_to_le32(1); + dst->i.i_xattr_nid = 0; + + new_ni = old_ni; + new_ni.ino = ino; + + err = set_node_addr(sbi, &new_ni, NEW_ADDR); + if (!err) + if (!inc_valid_node_count(sbi, NULL, 1)) + err = -ENOSPC; + if (!err) + inc_valid_inode_count(sbi); + f2fs_put_page(ipage, 1); + return err; +} + +int restore_node_summary(struct f2fs_sb_info *sbi, + unsigned int segno, struct f2fs_summary_block *sum) +{ + struct f2fs_node *rn; + struct f2fs_summary *sum_entry; + struct page *page; + block_t addr; + int i, last_offset; + + /* alloc temporal page for read node */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (!page) + return -ENOMEM; + lock_page(page); + + /* scan the node segment */ + last_offset = sbi->blocks_per_seg; + addr = START_BLOCK(sbi, segno); + sum_entry = &sum->entries[0]; + + for (i = 0; i < last_offset; i++, sum_entry++) { + /* + * In order to read next node page, + * we must clear PageUptodate flag. + */ + ClearPageUptodate(page); + + if (f2fs_readpage(sbi, page, addr, READ_SYNC)) + goto out; + + lock_page(page); + rn = F2FS_NODE(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + addr++; + } + unlock_page(page); +out: + __free_pages(page, 0); + return 0; +} + +static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + mutex_lock(&curseg->curseg_mutex); + + if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { + mutex_unlock(&curseg->curseg_mutex); + return false; + } + + for (i = 0; i < nats_in_cursum(sum); i++) { + struct nat_entry *ne; + struct f2fs_nat_entry raw_ne; + nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); + + raw_ne = nat_in_journal(sum, i); +retry: + write_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne) { + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + continue; + } + ne = grab_nat_entry(nm_i, nid); + if (!ne) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); + nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); + nat_set_version(ne, raw_ne.version); + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } + update_nats_in_cursum(sum, -i); + mutex_unlock(&curseg->curseg_mutex); + return true; +} + +/* + * This function is called during the checkpointing process. + */ +void flush_nat_entries(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + struct list_head *cur, *n; + struct page *page = NULL; + struct f2fs_nat_block *nat_blk = NULL; + nid_t start_nid = 0, end_nid = 0; + bool flushed; + + flushed = flush_nats_in_journal(sbi); + + if (!flushed) + mutex_lock(&curseg->curseg_mutex); + + /* 1) flush dirty nat caches */ + list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { + struct nat_entry *ne; + nid_t nid; + struct f2fs_nat_entry raw_ne; + int offset = -1; + block_t new_blkaddr; + + ne = list_entry(cur, struct nat_entry, list); + nid = nat_get_nid(ne); + + if (nat_get_blkaddr(ne) == NEW_ADDR) + continue; + if (flushed) + goto to_nat_page; + + /* if there is room for nat enries in curseg->sumpage */ + offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); + if (offset >= 0) { + raw_ne = nat_in_journal(sum, offset); + goto flush_now; + } +to_nat_page: + if (!page || (start_nid > nid || nid > end_nid)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + start_nid = START_NID(nid); + end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; + + /* + * get nat block with dirty flag, increased reference + * count, mapped and lock + */ + page = get_next_nat_page(sbi, start_nid); + nat_blk = page_address(page); + } + + BUG_ON(!nat_blk); + raw_ne = nat_blk->entries[nid - start_nid]; +flush_now: + new_blkaddr = nat_get_blkaddr(ne); + + raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); + raw_ne.block_addr = cpu_to_le32(new_blkaddr); + raw_ne.version = nat_get_version(ne); + + if (offset < 0) { + nat_blk->entries[nid - start_nid] = raw_ne; + } else { + nat_in_journal(sum, offset) = raw_ne; + nid_in_journal(sum, offset) = cpu_to_le32(nid); + } + + if (nat_get_blkaddr(ne) == NULL_ADDR && + add_free_nid(NM_I(sbi), nid, false) <= 0) { + write_lock(&nm_i->nat_tree_lock); + __del_from_nat_cache(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } else { + write_lock(&nm_i->nat_tree_lock); + __clear_nat_cache_dirty(nm_i, ne); + ne->checkpointed = true; + write_unlock(&nm_i->nat_tree_lock); + } + } + if (!flushed) + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(page, 1); + + /* 2) shrink nat caches if necessary */ + try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); +} + +static int init_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned char *version_bitmap; + unsigned int nat_segs, nat_blocks; + + nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); + + /* segment_count_nat includes pair segment so divide to 2. */ + nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; + nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + nm_i->fcnt = 0; + nm_i->nat_cnt = 0; + + INIT_LIST_HEAD(&nm_i->free_nid_list); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); + INIT_LIST_HEAD(&nm_i->nat_entries); + INIT_LIST_HEAD(&nm_i->dirty_nat_entries); + + mutex_init(&nm_i->build_lock); + spin_lock_init(&nm_i->free_nid_list_lock); + rwlock_init(&nm_i->nat_tree_lock); + + nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); + nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); + version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); + if (!version_bitmap) + return -EFAULT; + + nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, + GFP_KERNEL); + if (!nm_i->nat_bitmap) + return -ENOMEM; + return 0; +} + +int build_node_manager(struct f2fs_sb_info *sbi) +{ + int err; + + sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); + if (!sbi->nm_info) + return -ENOMEM; + + err = init_node_manager(sbi); + if (err) + return err; + + build_free_nids(sbi); + return 0; +} + +void destroy_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i, *next_i; + struct nat_entry *natvec[NATVEC_SIZE]; + nid_t nid = 0; + unsigned int found; + + if (!nm_i) + return; + + /* destroy free nid list */ + spin_lock(&nm_i->free_nid_list_lock); + list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { + BUG_ON(i->state == NID_ALLOC); + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + BUG_ON(nm_i->fcnt); + spin_unlock(&nm_i->free_nid_list_lock); + + /* destroy nat cache */ + write_lock(&nm_i->nat_tree_lock); + while ((found = __gang_lookup_nat_cache(nm_i, + nid, NATVEC_SIZE, natvec))) { + unsigned idx; + for (idx = 0; idx < found; idx++) { + struct nat_entry *e = natvec[idx]; + nid = nat_get_nid(e) + 1; + __del_from_nat_cache(nm_i, e); + } + } + BUG_ON(nm_i->nat_cnt); + write_unlock(&nm_i->nat_tree_lock); + + kfree(nm_i->nat_bitmap); + sbi->nm_info = NULL; + kfree(nm_i); +} + +int __init create_node_manager_caches(void) +{ + nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + sizeof(struct nat_entry), NULL); + if (!nat_entry_slab) + return -ENOMEM; + + free_nid_slab = f2fs_kmem_cache_create("free_nid", + sizeof(struct free_nid), NULL); + if (!free_nid_slab) { + kmem_cache_destroy(nat_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_node_manager_caches(void) +{ + kmem_cache_destroy(free_nid_slab); + kmem_cache_destroy(nat_entry_slab); +} diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h new file mode 100644 index 00000000000..3496bb3e15d --- /dev/null +++ b/fs/f2fs/node.h @@ -0,0 +1,345 @@ +/* + * fs/f2fs/node.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* start node id of a node block dedicated to the given node id */ +#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) + +/* node block offset on the NAT area dedicated to the given start node id */ +#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) + +/* # of pages to perform readahead before building free nids */ +#define FREE_NID_PAGES 4 + +/* maximum # of free node ids to produce during build_free_nids */ +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) + +/* maximum readahead size for node during getting data blocks */ +#define MAX_RA_NODE 128 + +/* maximum cached nat entries to manage memory footprint */ +#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) + +/* vector size for gang look-up from nat cache that consists of radix tree */ +#define NATVEC_SIZE 64 + +/* return value for read_node_page */ +#define LOCKED_PAGE 1 + +/* + * For node information + */ +struct node_info { + nid_t nid; /* node id */ + nid_t ino; /* inode number of the node's owner */ + block_t blk_addr; /* block address of the node */ + unsigned char version; /* version of the node */ +}; + +struct nat_entry { + struct list_head list; /* for clean or dirty nat list */ + bool checkpointed; /* whether it is checkpointed or not */ + struct node_info ni; /* in-memory node information */ +}; + +#define nat_get_nid(nat) (nat->ni.nid) +#define nat_set_nid(nat, n) (nat->ni.nid = n) +#define nat_get_blkaddr(nat) (nat->ni.blk_addr) +#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) +#define nat_get_ino(nat) (nat->ni.ino) +#define nat_set_ino(nat, i) (nat->ni.ino = i) +#define nat_get_version(nat) (nat->ni.version) +#define nat_set_version(nat, v) (nat->ni.version = v) + +#define __set_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->dirty_nat_entries); +#define __clear_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->nat_entries); +#define inc_node_version(version) (++version) + +static inline void node_info_from_raw_nat(struct node_info *ni, + struct f2fs_nat_entry *raw_ne) +{ + ni->ino = le32_to_cpu(raw_ne->ino); + ni->blk_addr = le32_to_cpu(raw_ne->block_addr); + ni->version = raw_ne->version; +} + +/* + * For free nid mangement + */ +enum nid_state { + NID_NEW, /* newly added to free nid list */ + NID_ALLOC /* it is allocated */ +}; + +struct free_nid { + struct list_head list; /* for free node id list */ + nid_t nid; /* node id */ + int state; /* in use or not: NID_NEW or NID_ALLOC */ +}; + +static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *fnid; + + if (nm_i->fcnt <= 0) + return -1; + spin_lock(&nm_i->free_nid_list_lock); + fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); + *nid = fnid->nid; + spin_unlock(&nm_i->free_nid_list_lock); + return 0; +} + +/* + * inline functions + */ +static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); +} + +static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + pgoff_t block_off; + pgoff_t block_addr; + int seg_off; + + block_off = NAT_BLOCK_OFFSET(start); + seg_off = block_off >> sbi->log_blocks_per_seg; + + block_addr = (pgoff_t)(nm_i->nat_blkaddr + + (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + block_addr += sbi->blocks_per_seg; + + return block_addr; +} + +static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + block_addr -= nm_i->nat_blkaddr; + if ((block_addr >> sbi->log_blocks_per_seg) % 2) + block_addr -= sbi->blocks_per_seg; + else + block_addr += sbi->blocks_per_seg; + + return block_addr + nm_i->nat_blkaddr; +} + +static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) +{ + unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + f2fs_clear_bit(block_off, nm_i->nat_bitmap); + else + f2fs_set_bit(block_off, nm_i->nat_bitmap); +} + +static inline void fill_node_footer(struct page *page, nid_t nid, + nid_t ino, unsigned int ofs, bool reset) +{ + struct f2fs_node *rn = F2FS_NODE(page); + if (reset) + memset(rn, 0, sizeof(*rn)); + rn->footer.nid = cpu_to_le32(nid); + rn->footer.ino = cpu_to_le32(ino); + rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); +} + +static inline void copy_node_footer(struct page *dst, struct page *src) +{ + struct f2fs_node *src_rn = F2FS_NODE(src); + struct f2fs_node *dst_rn = F2FS_NODE(dst); + memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); +} + +static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_node *rn = F2FS_NODE(page); + + rn->footer.cp_ver = ckpt->checkpoint_ver; + rn->footer.next_blkaddr = cpu_to_le32(blkaddr); +} + +static inline nid_t ino_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.ino); +} + +static inline nid_t nid_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.nid); +} + +static inline unsigned int ofs_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + unsigned flag = le32_to_cpu(rn->footer.flag); + return flag >> OFFSET_BIT_SHIFT; +} + +static inline unsigned long long cpver_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le64_to_cpu(rn->footer.cp_ver); +} + +static inline block_t next_blkaddr_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.next_blkaddr); +} + +/* + * f2fs assigns the following node offsets described as (num). + * N = NIDS_PER_BLOCK + * + * Inode block (0) + * |- direct node (1) + * |- direct node (2) + * |- indirect node (3) + * | `- direct node (4 => 4 + N - 1) + * |- indirect node (4 + N) + * | `- direct node (5 + N => 5 + 2N - 1) + * `- double indirect node (5 + 2N) + * `- indirect node (6 + 2N) + * `- direct node (x(N + 1)) + */ +static inline bool IS_DNODE(struct page *node_page) +{ + unsigned int ofs = ofs_of_node(node_page); + + if (ofs == XATTR_NODE_OFFSET) + return false; + + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || + ofs == 5 + 2 * NIDS_PER_BLOCK) + return false; + if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { + ofs -= 6 + 2 * NIDS_PER_BLOCK; + if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) + return false; + } + return true; +} + +static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +{ + struct f2fs_node *rn = F2FS_NODE(p); + + wait_on_page_writeback(p); + + if (i) + rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); + else + rn->in.nid[off] = cpu_to_le32(nid); + set_page_dirty(p); +} + +static inline nid_t get_nid(struct page *p, int off, bool i) +{ + struct f2fs_node *rn = F2FS_NODE(p); + + if (i) + return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); + return le32_to_cpu(rn->in.nid[off]); +} + +/* + * Coldness identification: + * - Mark cold files in f2fs_inode_info + * - Mark cold node blocks in their node footer + * - Mark cold data pages in page cache + */ +static inline int is_file(struct inode *inode, int type) +{ + return F2FS_I(inode)->i_advise & type; +} + +static inline void set_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise |= type; +} + +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + +static inline int is_cold_data(struct page *page) +{ + return PageChecked(page); +} + +static inline void set_cold_data(struct page *page) +{ + SetPageChecked(page); +} + +static inline void clear_cold_data(struct page *page) +{ + ClearPageChecked(page); +} + +static inline int is_node(struct page *page, int type) +{ + struct f2fs_node *rn = F2FS_NODE(page); + return le32_to_cpu(rn->footer.flag) & (1 << type); +} + +#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) + +static inline void set_cold_node(struct inode *inode, struct page *page) +{ + struct f2fs_node *rn = F2FS_NODE(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + + if (S_ISDIR(inode->i_mode)) + flag &= ~(0x1 << COLD_BIT_SHIFT); + else + flag |= (0x1 << COLD_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_mark(struct page *page, int mark, int type) +{ + struct f2fs_node *rn = F2FS_NODE(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << type); + else + flag &= ~(0x1 << type); + rn->footer.flag = cpu_to_le32(flag); +} +#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c new file mode 100644 index 00000000000..bee00347ee2 --- /dev/null +++ b/fs/f2fs/recovery.c @@ -0,0 +1,502 @@ +/* + * fs/f2fs/recovery.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "f2fs.h" +#include "node.h" +#include "segment.h" + +static struct kmem_cache *fsync_entry_slab; + +bool space_for_roll_forward(struct f2fs_sb_info *sbi) +{ + if (sbi->last_valid_block_count + sbi->alloc_valid_block_count + > sbi->user_block_count) + return false; + return true; +} + +static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, + nid_t ino) +{ + struct list_head *this; + struct fsync_inode_entry *entry; + + list_for_each(this, head) { + entry = list_entry(this, struct fsync_inode_entry, list); + if (entry->inode->i_ino == ino) + return entry; + } + return NULL; +} + +static int recover_dentry(struct page *ipage, struct inode *inode) +{ + struct f2fs_node *raw_node = F2FS_NODE(ipage); + struct f2fs_inode *raw_inode = &(raw_node->i); + nid_t pino = le32_to_cpu(raw_inode->i_pino); + struct f2fs_dir_entry *de; + struct qstr name; + struct page *page; + struct inode *dir, *einode; + int err = 0; + + dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); + if (!dir) { + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + f2fs_msg(inode->i_sb, KERN_INFO, + "%s: f2fs_iget failed: %ld", + __func__, PTR_ERR(dir)); + err = PTR_ERR(dir); + goto out; + } + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + add_dirty_dir_inode(dir); + } + + name.len = le32_to_cpu(raw_inode->i_namelen); + name.name = raw_inode->i_name; +retry: + de = f2fs_find_entry(dir, &name, &page); + if (de && inode->i_ino == le32_to_cpu(de->ino)) + goto out_unmap_put; + if (de) { + einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); + if (IS_ERR(einode)) { + WARN_ON(1); + if (PTR_ERR(einode) == -ENOENT) + err = -EEXIST; + goto out_unmap_put; + } + err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); + if (err) { + iput(einode); + goto out_unmap_put; + } + f2fs_delete_entry(de, page, einode); + iput(einode); + goto retry; + } + err = __f2fs_add_link(dir, &name, inode); + goto out; + +out_unmap_put: + kunmap(page); + f2fs_put_page(page, 0); +out: + f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode and its dentry: " + "ino = %x, name = %s, dir = %lx, err = %d", + ino_of_node(ipage), raw_inode->i_name, + IS_ERR(dir) ? 0 : dir->i_ino, err); + return err; +} + +static int recover_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_node *raw_node = F2FS_NODE(node_page); + struct f2fs_inode *raw_inode = &(raw_node->i); + + if (!IS_INODE(node_page)) + return 0; + + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + i_size_write(inode, le64_to_cpu(raw_inode->i_size)); + inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + + if (is_dent_dnode(node_page)) + return recover_dentry(node_page, inode); + + f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode: ino = %x, name = %s", + ino_of_node(node_page), raw_inode->i_name); + return 0; +} + +static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) +{ + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); + struct curseg_info *curseg; + struct page *page; + block_t blkaddr; + int err = 0; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); + blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; + + /* read node page */ + page = alloc_page(GFP_F2FS_ZERO); + if (!page) + return -ENOMEM; + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) + goto out; + + lock_page(page); + + if (cp_ver != cpver_of_node(page)) + break; + + if (!is_fsync_dnode(page)) + goto next; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (entry) { + if (IS_INODE(page) && is_dent_dnode(page)) + set_inode_flag(F2FS_I(entry->inode), + FI_INC_LINK); + } else { + if (IS_INODE(page) && is_dent_dnode(page)) { + err = recover_inode_page(sbi, page); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: recover_inode_page failed: %d", + __func__, err); + break; + } + } + + /* add this fsync inode to the list */ + entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); + if (!entry) { + err = -ENOMEM; + break; + } + + entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); + if (IS_ERR(entry->inode)) { + err = PTR_ERR(entry->inode); + f2fs_msg(sbi->sb, KERN_INFO, + "%s: f2fs_iget failed: %d", + __func__, err); + kmem_cache_free(fsync_entry_slab, entry); + break; + } + list_add_tail(&entry->list, head); + } + entry->blkaddr = blkaddr; + + err = recover_inode(entry->inode, page); + if (err && err != -ENOENT) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: recover_inode failed: %d", + __func__, err); + break; + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + } + unlock_page(page); +out: + __free_pages(page, 0); + return err; +} + +static void destroy_fsync_dnodes(struct list_head *head) +{ + struct fsync_inode_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, head, list) { + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +} + +static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, + block_t blkaddr, struct dnode_of_data *dn) +{ + struct seg_entry *sentry; + unsigned int segno = GET_SEGNO(sbi, blkaddr); + unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & + (sbi->blocks_per_seg - 1); + struct f2fs_summary sum; + nid_t ino, nid; + void *kaddr; + struct inode *inode; + struct page *node_page; + unsigned int offset; + block_t bidx; + int i; + + sentry = get_seg_entry(sbi, segno); + if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) + return 0; + + /* Get the previous summary */ + for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + if (curseg->segno == segno) { + sum = curseg->sum_blk->entries[blkoff]; + break; + } + } + if (i > CURSEG_COLD_DATA) { + struct page *sum_page = get_sum_page(sbi, segno); + struct f2fs_summary_block *sum_node; + kaddr = page_address(sum_page); + sum_node = (struct f2fs_summary_block *)kaddr; + sum = sum_node->entries[blkoff]; + f2fs_put_page(sum_page, 1); + } + + /* Use the locked dnode page and inode */ + nid = le32_to_cpu(sum.nid); + if (dn->inode->i_ino == nid) { + struct dnode_of_data tdn = *dn; + tdn.nid = nid; + tdn.node_page = dn->inode_page; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } else if (dn->nid == nid) { + struct dnode_of_data tdn = *dn; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } + + /* Get the node page */ + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + offset = ofs_of_node(node_page); + ino = ino_of_node(node_page); + f2fs_put_page(node_page, 1); + + /* Skip nodes with circular references */ + if (ino == dn->inode->i_ino) { + f2fs_msg(sbi->sb, KERN_ERR, "%s: node %x has circular inode %x", + __func__, ino, nid); + f2fs_handle_error(sbi); + return -EDEADLK; + } + + /* Deallocate previous index in the node page */ + inode = f2fs_iget(sbi->sb, ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + bidx = start_bidx_of_node(offset, F2FS_I(inode)) + + le16_to_cpu(sum.ofs_in_node); + + truncate_hole(inode, bidx, bidx + 1); + iput(inode); + return 0; +} + +static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, + struct page *page, block_t blkaddr) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int start, end; + struct dnode_of_data dn; + struct f2fs_summary sum; + struct node_info ni; + int err = 0, recovered = 0; + int ilock; + + start = start_bidx_of_node(ofs_of_node(page), fi); + if (IS_INODE(page)) + end = start + ADDRS_PER_INODE(fi); + else + end = start + ADDRS_PER_BLOCK; + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + + err = get_dnode_of_data(&dn, start, ALLOC_NODE); + if (err) { + mutex_unlock_op(sbi, ilock); + f2fs_msg(sbi->sb, KERN_INFO, + "%s: get_dnode_of_data failed: %d", __func__, err); + return err; + } + + wait_on_page_writeback(dn.node_page); + + get_node_info(sbi, dn.nid, &ni); + BUG_ON(ni.ino != ino_of_node(page)); + BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); + + for (; start < end; start++) { + block_t src, dest; + + src = datablock_addr(dn.node_page, dn.ofs_in_node); + dest = datablock_addr(page, dn.ofs_in_node); + + if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { + if (src == NULL_ADDR) { + int err = reserve_new_block(&dn); + /* We should not get -ENOSPC */ + if (err) + f2fs_msg(sbi->sb, KERN_INFO, + "%s: reserve_new_block failed: %d", + __func__, err); + BUG_ON(err); + } + + /* Check the previous node page having this index */ + err = check_index_in_prev_nodes(sbi, dest, &dn); + if (err) + goto err; + + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); + + /* write dummy data page */ + recover_data_page(sbi, NULL, &sum, src, dest); + update_extent_cache(dest, &dn); + recovered++; + } + dn.ofs_in_node++; + } + + /* write node page in place */ + set_summary(&sum, dn.nid, 0, 0); + if (IS_INODE(dn.node_page)) + sync_inode_page(&dn); + + copy_node_footer(dn.node_page, page); + fill_node_footer(dn.node_page, dn.nid, ni.ino, + ofs_of_node(page), false); + set_page_dirty(dn.node_page); + + recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); +err: + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + f2fs_msg(sbi->sb, KERN_DEBUG, "recover_data: ino = %lx, " + "recovered_data = %d blocks, err = %d", + inode->i_ino, recovered, err); + return err; +} + +static int recover_data(struct f2fs_sb_info *sbi, + struct list_head *head, int type) +{ + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); + struct curseg_info *curseg; + struct page *page; + int err = 0; + block_t blkaddr; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, type); + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + + /* read node page */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (!page) + return -ENOMEM; + + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: f2fs_readpage failed: %d", + __func__, err); + goto out; + } + + lock_page(page); + + if (cp_ver != cpver_of_node(page)) + break; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (!entry) + goto next; + + err = do_recover_data(sbi, entry->inode, page, blkaddr); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: do_recover_data failed: %d", + __func__, err); + break; + } + + if (entry->blkaddr == blkaddr) { + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + } + unlock_page(page); +out: + __free_pages(page, 0); + + if (!err) + allocate_new_segments(sbi); + return err; +} + +int recover_fsync_data(struct f2fs_sb_info *sbi) +{ + struct list_head inode_list; + int err; + + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", + sizeof(struct fsync_inode_entry), NULL); + if (unlikely(!fsync_entry_slab)) + return -ENOMEM; + + INIT_LIST_HEAD(&inode_list); + + /* step #1: find fsynced inode numbers */ + sbi->por_doing = 1; + err = find_fsync_dnodes(sbi, &inode_list); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: find_fsync_dnodes failed: %d", __func__, err); + goto out; + } + + if (list_empty(&inode_list)) + goto out; + + /* step #2: recover data */ + err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + if (!list_empty(&inode_list)) { + f2fs_handle_error(sbi); + err = -EIO; + } +out: + destroy_fsync_dnodes(&inode_list); + kmem_cache_destroy(fsync_entry_slab); + sbi->por_doing = 0; + if (!err) { + f2fs_msg(sbi->sb, KERN_INFO, "recovery complete"); + write_checkpoint(sbi, false); + } else + f2fs_msg(sbi->sb, KERN_ERR, "recovery did not fully complete"); + + return err; +} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c new file mode 100644 index 00000000000..cb8e70e88ac --- /dev/null +++ b/fs/f2fs/segment.c @@ -0,0 +1,1787 @@ +/* + * fs/f2fs/segment.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "segment.h" +#include "node.h" +#include + +/* + * This function balances dirty node and dentry pages. + * In addition, it controls garbage collection. + */ +void f2fs_balance_fs(struct f2fs_sb_info *sbi) +{ + /* + * We should do GC or end up with checkpoint, if there are so many dirty + * dir/node pages without enough free segments. + */ + if (has_not_enough_free_secs(sbi, 0)) { + mutex_lock(&sbi->gc_mutex); + f2fs_gc(sbi); + } +} + +static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + /* need not be added */ + if (IS_CURSEG(sbi, segno)) + return; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + + if (dirty_type == DIRTY) { + struct seg_entry *sentry = get_seg_entry(sbi, segno); + enum dirty_type t = DIRTY_HOT_DATA; + + dirty_type = sentry->type; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + + /* Only one bitmap should be set */ + for (; t <= DIRTY_COLD_NODE; t++) { + if (t == dirty_type) + continue; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + } + } +} + +static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]--; + + if (dirty_type == DIRTY) { + enum dirty_type t = DIRTY_HOT_DATA; + + /* clear all the bitmaps */ + for (; t <= DIRTY_COLD_NODE; t++) + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + + if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + clear_bit(GET_SECNO(sbi, segno), + dirty_i->victim_secmap); + } +} + +/* + * Should not occur error such as -ENOMEM. + * Adding dirty entry into seglist is not critical operation. + * If a given segment is one of current working segments, it won't be added. + */ +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned short valid_blocks; + + if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + return; + + mutex_lock(&dirty_i->seglist_lock); + + valid_blocks = get_valid_blocks(sbi, segno, 0); + + if (valid_blocks == 0) { + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } else if (valid_blocks < sbi->blocks_per_seg) { + __locate_dirty_segment(sbi, segno, DIRTY); + } else { + /* Recovery routine with SSR needs this */ + __remove_dirty_segment(sbi, segno, DIRTY); + } + + mutex_unlock(&dirty_i->seglist_lock); +} + +/* + * Should call clear_prefree_segments after checkpoint is done. + */ +static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = -1; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + segno + 1); + if (segno >= total_segs) + break; + __set_test_and_free(sbi, segno); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +void clear_prefree_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = -1; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + segno + 1); + if (segno >= total_segs) + break; + + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) + dirty_i->nr_dirty[PRE]--; + + /* Let's use trim */ + if (test_opt(sbi, DISCARD)) + blkdev_issue_discard(sbi->sb->s_bdev, + START_BLOCK(sbi, segno) << + sbi->log_sectors_per_block, + 1 << (sbi->log_sectors_per_block + + sbi->log_blocks_per_seg), + GFP_NOFS, 0); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) + sit_i->dirty_sentries++; +} + +static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, + unsigned int segno, int modified) +{ + struct seg_entry *se = get_seg_entry(sbi, segno); + se->type = type; + if (modified) + __mark_sit_entry_dirty(sbi, segno); +} + +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + bool check_map = false; + + segno = GET_SEGNO(sbi, blkaddr); + + se = get_seg_entry(sbi, segno); + new_vblocks = se->valid_blocks + del; + offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); + + if (new_vblocks < 0 || new_vblocks > sbi->blocks_per_seg || + (new_vblocks >> (sizeof(unsigned short) << 3))) + if (f2fs_handle_error(sbi)) + check_map = true; + + se->mtime = get_mtime(sbi); + SIT_I(sbi)->max_mtime = se->mtime; + + /* Update valid block bitmap */ + if (del > 0) { + if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_handle_error(sbi)) + check_map = true; + } else { + if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (f2fs_handle_error(sbi)) + check_map = true; + } + + if (unlikely(check_map)) { + int i; + long int vblocks = 0; + + f2fs_msg(sbi->sb, KERN_ERR, + "cannot %svalidate block %u in segment %u with %hu valid blocks", + (del < 0) ? "in" : "", + offset, segno, se->valid_blocks); + + /* assume the count was stale to start */ + del = 0; + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, se->cur_valid_map)) + vblocks++; + if (vblocks != se->valid_blocks) { + f2fs_msg(sbi->sb, KERN_INFO, "correcting valid block " + "counts %d -> %ld", se->valid_blocks, vblocks); + /* make accounting corrections */ + del = vblocks - se->valid_blocks; + } + } + se->valid_blocks += del; + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + __mark_sit_entry_dirty(sbi, segno); + + /* update total number of valid blocks to be written in ckpt area */ + SIT_I(sbi)->written_valid_blocks += del; + + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, segno)->valid_blocks += del; +} + +static void refresh_sit_entry(struct f2fs_sb_info *sbi, + block_t old_blkaddr, block_t new_blkaddr) +{ + update_sit_entry(sbi, new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); +} + +void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +{ + unsigned int segno = GET_SEGNO(sbi, addr); + struct sit_info *sit_i = SIT_I(sbi); + + BUG_ON(addr == NULL_ADDR); + if (addr == NEW_ADDR) + return; + + if (segno >= TOTAL_SEGS(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, "invalid segment number %u", segno); + if (f2fs_handle_error(sbi)) + return; + } + + /* add it into sit main buffer */ + mutex_lock(&sit_i->sentry_lock); + + update_sit_entry(sbi, addr, -1); + + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + mutex_unlock(&sit_i->sentry_lock); +} + +/* + * This function should be resided under the curseg_mutex lock + */ +static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, + struct f2fs_summary *sum) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + void *addr = curseg->sum_blk; + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); + memcpy(addr, sum, sizeof(struct f2fs_summary)); +} + +/* + * Calculate the number of current summary pages for writing + */ +int npages_for_summary_flush(struct f2fs_sb_info *sbi) +{ + int total_size_bytes = 0; + int valid_sum_count = 0; + int i, sum_space; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (sbi->ckpt->alloc_type[i] == SSR) + valid_sum_count += sbi->blocks_per_seg; + else + valid_sum_count += curseg_blkoff(sbi, i); + } + + total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) + + sizeof(struct nat_journal) + 2 + + sizeof(struct sit_journal) + 2; + sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; + if (total_size_bytes < sum_space) + return 1; + else if (total_size_bytes < 2 * sum_space) + return 2; + return 3; +} + +/* + * Caller should put this summary page + */ +struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +{ + return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); +} + +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) +{ + struct page *page = grab_meta_page(sbi, blk_addr); + void *kaddr = page_address(page); + memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno + 1; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); + return 0; +} + +/* + * Find a new segment from the free segments bitmap to right order + * This function should be returned with success, otherwise BUG + */ +static void get_new_segment(struct f2fs_sb_info *sbi, + unsigned int *newseg, bool new_sec, int dir) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int segno, secno, zoneno; + unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; + unsigned int hint = *newseg / sbi->segs_per_sec; + unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int left_start = hint; + bool init = true; + int go_left = 0; + int i; + + write_lock(&free_i->segmap_lock); + + if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { + segno = find_next_zero_bit(free_i->free_segmap, + TOTAL_SEGS(sbi), *newseg + 1); + if (segno - *newseg < sbi->segs_per_sec - + (*newseg % sbi->segs_per_sec)) + goto got_it; + } +find_other_zone: + secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); + if (secno >= TOTAL_SECS(sbi)) { + if (dir == ALLOC_RIGHT) { + secno = find_next_zero_bit(free_i->free_secmap, + TOTAL_SECS(sbi), 0); + BUG_ON(secno >= TOTAL_SECS(sbi)); + } else { + go_left = 1; + left_start = hint - 1; + } + } + if (go_left == 0) + goto skip_left; + + while (test_bit(left_start, free_i->free_secmap)) { + if (left_start > 0) { + left_start--; + continue; + } + left_start = find_next_zero_bit(free_i->free_secmap, + TOTAL_SECS(sbi), 0); + BUG_ON(left_start >= TOTAL_SECS(sbi)); + break; + } + secno = left_start; +skip_left: + hint = secno; + segno = secno * sbi->segs_per_sec; + zoneno = secno / sbi->secs_per_zone; + + /* give up on finding another zone */ + if (!init) + goto got_it; + if (sbi->secs_per_zone == 1) + goto got_it; + if (zoneno == old_zoneno) + goto got_it; + if (dir == ALLOC_LEFT) { + if (!go_left && zoneno + 1 >= total_zones) + goto got_it; + if (go_left && zoneno == 0) + goto got_it; + } + for (i = 0; i < NR_CURSEG_TYPE; i++) + if (CURSEG_I(sbi, i)->zone == zoneno) + break; + + if (i < NR_CURSEG_TYPE) { + /* zone is in user, try another */ + if (go_left) + hint = zoneno * sbi->secs_per_zone - 1; + else if (zoneno + 1 >= total_zones) + hint = 0; + else + hint = (zoneno + 1) * sbi->secs_per_zone; + init = false; + goto find_other_zone; + } +got_it: + /* set it as dirty segment in free segmap */ + BUG_ON(test_bit(segno, free_i->free_segmap)); + __set_inuse(sbi, segno); + *newseg = segno; + write_unlock(&free_i->segmap_lock); +} + +static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + struct summary_footer *sum_footer; + + curseg->segno = curseg->next_segno; + curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->next_blkoff = 0; + curseg->next_segno = NULL_SEGNO; + + sum_footer = &(curseg->sum_blk->footer); + memset(sum_footer, 0, sizeof(struct summary_footer)); + if (IS_DATASEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); + if (IS_NODESEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); + __set_sit_entry_type(sbi, type, curseg->segno, modified); +} + +/* + * Allocate a current working segment. + * This function always allocates a free segment in LFS manner. + */ +static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno; + int dir = ALLOC_LEFT; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, segno)); + if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) + dir = ALLOC_RIGHT; + + if (test_opt(sbi, NOHEAP)) + dir = ALLOC_RIGHT; + + get_new_segment(sbi, &segno, new_sec, dir); + curseg->next_segno = segno; + reset_curseg(sbi, type, 1); + curseg->alloc_type = LFS; +} + +static void __next_free_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg, block_t start) +{ + struct seg_entry *se = get_seg_entry(sbi, seg->segno); + block_t ofs; + for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { + if (!f2fs_test_bit(ofs, se->ckpt_valid_map) + && !f2fs_test_bit(ofs, se->cur_valid_map)) + break; + } + seg->next_blkoff = ofs; +} + +/* + * If a segment is written by LFS manner, next block offset is just obtained + * by increasing the current block offset. However, if a segment is written by + * SSR manner, next block offset obtained by calling __next_free_blkoff + */ +static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg) +{ + if (seg->alloc_type == SSR) + __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); + else + seg->next_blkoff++; +} + +/* + * This function always allocates a used segment (from dirty seglist) by SSR + * manner, so it should recover the existing segment information of valid blocks + */ +static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int new_segno = curseg->next_segno; + struct f2fs_summary_block *sum_node; + struct page *sum_page; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); + __set_test_and_inuse(sbi, new_segno); + + mutex_lock(&dirty_i->seglist_lock); + __remove_dirty_segment(sbi, new_segno, PRE); + __remove_dirty_segment(sbi, new_segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + + reset_curseg(sbi, type, 1); + curseg->alloc_type = SSR; + __next_free_blkoff(sbi, curseg, 0); + + if (reuse) { + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); + } +} + +static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + + if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) + return v_ops->get_victim(sbi, + &(curseg)->next_segno, BG_GC, type, SSR); + + /* For data segments, let's do SSR more intensively */ + for (; type >= CURSEG_HOT_DATA; type--) + if (v_ops->get_victim(sbi, &(curseg)->next_segno, + BG_GC, type, SSR)) + return 1; + return 0; +} + +/* + * flush out current segment and replace it with new segment + * This function should be returned with success, otherwise BUG + */ +static void allocate_segment_by_default(struct f2fs_sb_info *sbi, + int type, bool force) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + + if (force) + new_curseg(sbi, type, true); + else if (type == CURSEG_WARM_NODE) + new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); + else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type, true); + else + new_curseg(sbi, type, false); +#ifdef CONFIG_F2FS_STAT_FS + sbi->segment_count[curseg->alloc_type]++; +#endif +} + +void allocate_new_segments(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg; + unsigned int old_curseg; + int i; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + curseg = CURSEG_I(sbi, i); + old_curseg = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_curseg); + } +} + +static const struct segment_allocation default_salloc_ops = { + .allocate_segment = allocate_segment_by_default, +}; + +static void f2fs_end_io_write(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_private *p = bio->bi_private; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (!uptodate) { + SetPageError(page); + if (page->mapping) + set_bit(AS_EIO, &page->mapping->flags); + set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); + p->sbi->sb->s_flags |= MS_RDONLY; + } + end_page_writeback(page); + dec_page_count(p->sbi, F2FS_WRITEBACK); + } while (bvec >= bio->bi_io_vec); + + if (p->is_sync) + complete(p->wait); + kfree(p); + bio_put(bio); +} + +struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) +{ + struct bio *bio; + + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + bio->bi_bdev = bdev; + bio->bi_private = NULL; + + return bio; +} + +static void do_submit_bio(struct f2fs_sb_info *sbi, + enum page_type type, bool sync) +{ + int rw = sync ? WRITE_SYNC : WRITE; + enum page_type btype = type > META ? META : type; + + if (type >= META_FLUSH) + rw = WRITE_FLUSH_FUA; + + if (btype == META) + rw |= REQ_META; + + if (sbi->bio[btype]) { + struct bio_private *p = sbi->bio[btype]->bi_private; + p->sbi = sbi; + sbi->bio[btype]->bi_end_io = f2fs_end_io_write; + + trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); + + if (type == META_FLUSH) { + DECLARE_COMPLETION_ONSTACK(wait); + p->is_sync = true; + p->wait = &wait; + submit_bio(rw, sbi->bio[btype]); + wait_for_completion(&wait); + } else { + p->is_sync = false; + submit_bio(rw, sbi->bio[btype]); + } + sbi->bio[btype] = NULL; + } +} + +void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) +{ + down_write(&sbi->bio_sem); + do_submit_bio(sbi, type, sync); + up_write(&sbi->bio_sem); +} + +static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, enum page_type type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + + verify_block_addr(sbi, blk_addr); + + down_write(&sbi->bio_sem); + + inc_page_count(sbi, F2FS_WRITEBACK); + + if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) + do_submit_bio(sbi, type, false); +alloc_new: + if (sbi->bio[type] == NULL) { + struct bio_private *priv; +retry: + priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); + if (!priv) { + cond_resched(); + goto retry; + } + + sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); + sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + sbi->bio[type]->bi_private = priv; + /* + * The end_io will be assigned at the sumbission phase. + * Until then, let bio_add_page() merge consecutive IOs as much + * as possible. + */ + } + + if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + do_submit_bio(sbi, type, false); + goto alloc_new; + } + + sbi->last_block_in_bio[type] = blk_addr; + + up_write(&sbi->bio_sem); + trace_f2fs_submit_write_page(page, blk_addr, type); +} + +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + if (PageWriteback(page)) { + f2fs_submit_bio(sbi, type, sync); + wait_on_page_writeback(page); + } +} + +static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + if (curseg->next_blkoff < sbi->blocks_per_seg) + return true; + return false; +} + +static int __get_segment_type_2(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) + return CURSEG_HOT_DATA; + else + return CURSEG_HOT_NODE; +} + +static int __get_segment_type_4(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else + return CURSEG_COLD_DATA; + } else { + if (IS_DNODE(page) && !is_cold_node(page)) + return CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type_6(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else if (is_cold_data(page) || file_is_cold(inode)) + return CURSEG_COLD_DATA; + else + return CURSEG_WARM_DATA; + } else { + if (IS_DNODE(page)) + return is_cold_node(page) ? CURSEG_WARM_NODE : + CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type(struct page *page, enum page_type p_type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + switch (sbi->active_logs) { + case 2: + return __get_segment_type_2(page, p_type); + case 4: + return __get_segment_type_4(page, p_type); + } + /* NR_CURSEG_TYPE(6) logs by default */ + BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); + return __get_segment_type_6(page, p_type); +} + +static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, enum page_type p_type) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int old_cursegno; + int type; + + type = __get_segment_type(page, p_type); + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + + *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + old_cursegno = curseg->segno; + + /* + * __add_sum_entry should be resided under the curseg_mutex + * because, this function updates a summary entry in the + * current summary block. + */ + __add_sum_entry(sbi, type, sum); + + mutex_lock(&sit_i->sentry_lock); + __refresh_next_blkoff(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS + sbi->block_count[curseg->alloc_type]++; +#endif + + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + mutex_unlock(&sit_i->sentry_lock); + + if (p_type == NODE) + fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + + /* writeout dirty page into bdev */ + submit_write_page(sbi, page, *new_blkaddr, p_type); + + mutex_unlock(&curseg->curseg_mutex); +} + +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) +{ + set_page_writeback(page); + submit_write_page(sbi, page, page->index, META); +} + +void write_node_page(struct f2fs_sb_info *sbi, struct page *page, + unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) +{ + struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); +} + +void write_data_page(struct inode *inode, struct page *page, + struct dnode_of_data *dn, block_t old_blkaddr, + block_t *new_blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_summary sum; + struct node_info ni; + + BUG_ON(old_blkaddr == NULL_ADDR); + get_node_info(sbi, dn->nid, &ni); + set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + + do_write_page(sbi, page, old_blkaddr, + new_blkaddr, &sum, DATA); +} + +void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blk_addr) +{ + submit_write_page(sbi, page, old_blk_addr, DATA); +} + +void recover_data_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + struct seg_entry *se; + int type; + + segno = GET_SEGNO(sbi, new_blkaddr); + se = get_seg_entry(sbi, segno); + type = se->type; + + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum); + + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +void rewrite_node_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + int type = CURSEG_WARM_NODE; + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + block_t next_blkaddr = next_blkaddr_of_node(page); + unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); + + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + segno = GET_SEGNO(sbi, new_blkaddr); + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum); + + /* change the current log to the next block addr in advance */ + if (next_segno != segno) { + curseg->next_segno = next_segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & + (sbi->blocks_per_seg - 1); + + /* rewrite node page */ + set_page_writeback(page); + submit_write_page(sbi, page, new_blkaddr, NODE); + f2fs_submit_bio(sbi, NODE, true); + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +static int read_compacted_summaries(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct curseg_info *seg_i; + unsigned char *kaddr; + struct page *page; + block_t start; + int i, j, offset; + + start = start_sum_block(sbi); + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: restore nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); + + /* Step 2: restore sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, + SUM_JOURNAL_SIZE); + offset = 2 * SUM_JOURNAL_SIZE; + + /* Step 3: restore summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blk_off; + unsigned int segno; + + seg_i = CURSEG_I(sbi, i); + segno = le32_to_cpu(ckpt->cur_data_segno[i]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + seg_i->next_segno = segno; + reset_curseg(sbi, i, 0); + seg_i->alloc_type = ckpt->alloc_type[i]; + seg_i->next_blkoff = blk_off; + + if (seg_i->alloc_type == SSR) + blk_off = sbi->blocks_per_seg; + + for (j = 0; j < blk_off; j++) { + struct f2fs_summary *s; + s = (struct f2fs_summary *)(kaddr + offset); + seg_i->sum_blk->entries[j] = *s; + offset += SUMMARY_SIZE; + if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + offset = 0; + } + } + f2fs_put_page(page, 1); + return 0; +} + +static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_summary_block *sum; + struct curseg_info *curseg; + struct page *new; + unsigned short blk_off; + unsigned int segno = 0; + block_t blk_addr = 0; + + /* get segment number and block addr */ + if (IS_DATASEG(type)) { + segno = le32_to_cpu(ckpt->cur_data_segno[type]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - + CURSEG_HOT_DATA]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); + else + blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); + } else { + segno = le32_to_cpu(ckpt->cur_node_segno[type - + CURSEG_HOT_NODE]); + blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - + CURSEG_HOT_NODE]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, + type - CURSEG_HOT_NODE); + else + blk_addr = GET_SUM_BLOCK(sbi, segno); + } + + new = get_meta_page(sbi, blk_addr); + sum = (struct f2fs_summary_block *)page_address(new); + + if (IS_NODESEG(type)) { + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + struct f2fs_summary *ns = &sum->entries[0]; + int i; + for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { + ns->version = 0; + ns->ofs_in_node = 0; + } + } else { + if (restore_node_summary(sbi, segno, sum)) { + f2fs_put_page(new, 1); + return -EINVAL; + } + } + } + + /* set uncompleted segment to curseg */ + curseg = CURSEG_I(sbi, type); + mutex_lock(&curseg->curseg_mutex); + memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); + curseg->next_segno = segno; + reset_curseg(sbi, type, 0); + curseg->alloc_type = ckpt->alloc_type[type]; + curseg->next_blkoff = blk_off; + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(new, 1); + return 0; +} + +static int restore_curseg_summaries(struct f2fs_sb_info *sbi) +{ + int type = CURSEG_HOT_DATA; + + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + /* restore for compacted data summary */ + if (read_compacted_summaries(sbi)) + return -EINVAL; + type = CURSEG_HOT_NODE; + } + + for (; type <= CURSEG_COLD_NODE; type++) + if (read_normal_summaries(sbi, type)) + return -EINVAL; + return 0; +} + +static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct page *page; + unsigned char *kaddr; + struct f2fs_summary *summary; + struct curseg_info *seg_i; + int written_size = 0; + int i, j; + + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: write nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + /* Step 2: write sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, + SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + set_page_dirty(page); + + /* Step 3: write summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blkoff; + seg_i = CURSEG_I(sbi, i); + if (sbi->ckpt->alloc_type[i] == SSR) + blkoff = sbi->blocks_per_seg; + else + blkoff = curseg_blkoff(sbi, i); + + for (j = 0; j < blkoff; j++) { + if (!page) { + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + written_size = 0; + } + summary = (struct f2fs_summary *)(kaddr + written_size); + *summary = seg_i->sum_blk->entries[j]; + written_size += SUMMARY_SIZE; + set_page_dirty(page); + + if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + } + } + if (page) + f2fs_put_page(page, 1); +} + +static void write_normal_summaries(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + int i, end; + if (IS_DATASEG(type)) + end = type + NR_CURSEG_DATA_TYPE; + else + end = type + NR_CURSEG_NODE_TYPE; + + for (i = type; i < end; i++) { + struct curseg_info *sum = CURSEG_I(sbi, i); + mutex_lock(&sum->curseg_mutex); + write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); + mutex_unlock(&sum->curseg_mutex); + } +} + +void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) + write_compacted_summaries(sbi, start_blk); + else + write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); +} + +void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); +} + +int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, + unsigned int val, int alloc) +{ + int i; + + if (type == NAT_JOURNAL) { + for (i = 0; i < nats_in_cursum(sum); i++) { + if (le32_to_cpu(nid_in_journal(sum, i)) == val) + return i; + } + if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) + return update_nats_in_cursum(sum, 1); + } else if (type == SIT_JOURNAL) { + for (i = 0; i < sits_in_cursum(sum); i++) + if (le32_to_cpu(segno_in_journal(sum, i)) == val) + return i; + if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) + return update_sits_in_cursum(sum, 1); + } + return -1; +} + +static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, segno); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return get_meta_page(sbi, blk_addr); +} + +static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct page *src_page, *dst_page; + pgoff_t src_off, dst_off; + void *src_addr, *dst_addr; + + src_off = current_sit_addr(sbi, start); + dst_off = next_sit_addr(sbi, src_off); + + /* get current sit block page without lock */ + src_page = get_meta_page(sbi, src_off); + dst_page = grab_meta_page(sbi, dst_off); + BUG_ON(PageDirty(src_page)); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_sit(sit_i, start); + + return dst_page; +} + +static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + /* + * If the journal area in the current summary is full of sit entries, + * all the sit entries will be flushed. Otherwise the sit entries + * are not able to replace with newly hot sit entries. + */ + if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { + for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { + unsigned int segno; + segno = le32_to_cpu(segno_in_journal(sum, i)); + __mark_sit_entry_dirty(sbi, segno); + } + update_sits_in_cursum(sum, -sits_in_cursum(sum)); + return 1; + } + return 0; +} + +/* + * CP calls this function, which flushes SIT entries including sit_journal, + * and moves prefree segs to free segs. + */ +void flush_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned long *bitmap = sit_i->dirty_sentries_bitmap; + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned long nsegs = TOTAL_SEGS(sbi); + struct page *page = NULL; + struct f2fs_sit_block *raw_sit = NULL; + unsigned int start = 0, end = 0; + unsigned int segno = -1; + bool flushed; + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + /* + * "flushed" indicates whether sit entries in journal are flushed + * to the SIT area or not. + */ + flushed = flush_sits_in_journal(sbi); + + while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { + struct seg_entry *se = get_seg_entry(sbi, segno); + int sit_offset, offset; + + sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + + if (flushed) + goto to_sit_page; + + offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); + if (offset >= 0) { + segno_in_journal(sum, offset) = cpu_to_le32(segno); + seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); + goto flush_done; + } +to_sit_page: + if (!page || (start > segno) || (segno > end)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + + start = START_SEGNO(sit_i, segno); + end = start + SIT_ENTRY_PER_BLOCK - 1; + + /* read sit block that will be updated */ + page = get_next_sit_page(sbi, start); + raw_sit = page_address(page); + } + + /* udpate entry in SIT block */ + seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); +flush_done: + __clear_bit(segno, bitmap); + sit_i->dirty_sentries--; + } + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); + + /* writeout last modified SIT block */ + f2fs_put_page(page, 1); + + set_prefree_as_free_segments(sbi); +} + +static int build_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct sit_info *sit_i; + unsigned int sit_segs, start; + char *src_bitmap, *dst_bitmap; + unsigned int bitmap_size; + + /* allocate memory for SIT information */ + sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); + if (!sit_i) + return -ENOMEM; + + SM_I(sbi)->sit_info = sit_i; + + sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); + if (!sit_i->sentries) + return -ENOMEM; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!sit_i->dirty_sentries_bitmap) + return -ENOMEM; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + sit_i->sentries[start].cur_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + sit_i->sentries[start].ckpt_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map + || !sit_i->sentries[start].ckpt_valid_map) + return -ENOMEM; + } + + if (sbi->segs_per_sec > 1) { + sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * + sizeof(struct sec_entry)); + if (!sit_i->sec_entries) + return -ENOMEM; + } + + /* get information related with SIT */ + sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; + + /* setup SIT bitmap from ckeckpoint pack */ + bitmap_size = __bitmap_size(sbi, SIT_BITMAP); + src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); + + dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + if (!dst_bitmap) + return -ENOMEM; + + /* init SIT information */ + sit_i->s_ops = &default_salloc_ops; + + sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); + sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; + sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); + sit_i->sit_bitmap = dst_bitmap; + sit_i->bitmap_size = bitmap_size; + sit_i->dirty_sentries = 0; + sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; + sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); + sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + mutex_init(&sit_i->sentry_lock); + return 0; +} + +static int build_free_segmap(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + struct free_segmap_info *free_i; + unsigned int bitmap_size, sec_bitmap_size; + + /* allocate memory for free segmap information */ + free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); + if (!free_i) + return -ENOMEM; + + SM_I(sbi)->free_info = free_i; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); + if (!free_i->free_segmap) + return -ENOMEM; + + sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); + if (!free_i->free_secmap) + return -ENOMEM; + + /* set all segments as dirty temporarily */ + memset(free_i->free_segmap, 0xff, bitmap_size); + memset(free_i->free_secmap, 0xff, sec_bitmap_size); + + /* init free segmap information */ + free_i->start_segno = + (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); + free_i->free_segments = 0; + free_i->free_sections = 0; + rwlock_init(&free_i->segmap_lock); + return 0; +} + +static int build_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array; + int i; + + array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + if (!array) + return -ENOMEM; + + SM_I(sbi)->curseg_array = array; + + for (i = 0; i < NR_CURSEG_TYPE; i++) { + mutex_init(&array[i].curseg_mutex); + array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + if (!array[i].sum_blk) + return -ENOMEM; + array[i].segno = NULL_SEGNO; + array[i].next_blkoff = 0; + } + return restore_curseg_summaries(sbi); +} + +static void build_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned int start; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *se = &sit_i->sentries[start]; + struct f2fs_sit_block *sit_blk; + struct f2fs_sit_entry sit; + struct page *page; + int i; + + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < sits_in_cursum(sum); i++) { + if (le32_to_cpu(segno_in_journal(sum, i)) == start) { + sit = sit_in_journal(sum, i); + mutex_unlock(&curseg->curseg_mutex); + goto got_it; + } + } + mutex_unlock(&curseg->curseg_mutex); + page = get_current_sit_page(sbi, start); + sit_blk = (struct f2fs_sit_block *)page_address(page); + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; + f2fs_put_page(page, 1); +got_it: + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + if (sbi->segs_per_sec > 1) { + struct sec_entry *e = get_sec_entry(sbi, start); + e->valid_blocks += se->valid_blocks; + } + } +} + +static void init_free_segmap(struct f2fs_sb_info *sbi) +{ + unsigned int start; + int type; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *sentry = get_seg_entry(sbi, start); + if (!sentry->valid_blocks) + __set_free(sbi, start); + } + + /* set use the current segments */ + for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { + struct curseg_info *curseg_t = CURSEG_I(sbi, type); + __set_test_and_inuse(sbi, curseg_t->segno); + } +} + +static void init_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); + unsigned short valid_blocks; + + while (1) { + /* find dirty segment based on free segmap */ + segno = find_next_inuse(free_i, total_segs, offset); + if (segno >= total_segs) + break; + offset = segno + 1; + valid_blocks = get_valid_blocks(sbi, segno, 0); + if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) + continue; + mutex_lock(&dirty_i->seglist_lock); + __locate_dirty_segment(sbi, segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + } +} + +static int init_victim_secmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + + dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->victim_secmap) + return -ENOMEM; + return 0; +} + +static int build_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i; + unsigned int bitmap_size, i; + + /* allocate memory for dirty segments list information */ + dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); + if (!dirty_i) + return -ENOMEM; + + SM_I(sbi)->dirty_info = dirty_i; + mutex_init(&dirty_i->seglist_lock); + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + + for (i = 0; i < NR_DIRTY_TYPE; i++) { + dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->dirty_segmap[i]) + return -ENOMEM; + } + + init_dirty_segmap(sbi); + return init_victim_secmap(sbi); +} + +/* + * Update min, max modified time for cost-benefit GC algorithm + */ +static void init_min_max_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno; + + mutex_lock(&sit_i->sentry_lock); + + sit_i->min_mtime = LLONG_MAX; + + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + unsigned int i; + unsigned long long mtime = 0; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, segno + i)->mtime; + + mtime = div_u64(mtime, sbi->segs_per_sec); + + if (sit_i->min_mtime > mtime) + sit_i->min_mtime = mtime; + } + sit_i->max_mtime = get_mtime(sbi); + mutex_unlock(&sit_i->sentry_lock); +} + +int build_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_sm_info *sm_info; + int err; + + sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); + if (!sm_info) + return -ENOMEM; + + /* init sm info */ + sbi->sm_info = sm_info; + INIT_LIST_HEAD(&sm_info->wblist_head); + spin_lock_init(&sm_info->wblist_lock); + sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); + sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); + sm_info->segment_count = le32_to_cpu(raw_super->segment_count); + sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); + sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); + sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); + sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + + err = build_sit_info(sbi); + if (err) + return err; + err = build_free_segmap(sbi); + if (err) + return err; + err = build_curseg(sbi); + if (err) + return err; + + /* reinit free segmap based on SIT */ + build_sit_entries(sbi); + + init_free_segmap(sbi); + err = build_dirty_segmap(sbi); + if (err) + return err; + + init_min_max_mtime(sbi); + return 0; +} + +static void discard_dirty_segmap(struct f2fs_sb_info *sbi, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + mutex_lock(&dirty_i->seglist_lock); + kfree(dirty_i->dirty_segmap[dirty_type]); + dirty_i->nr_dirty[dirty_type] = 0; + mutex_unlock(&dirty_i->seglist_lock); +} + +static void destroy_victim_secmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + kfree(dirty_i->victim_secmap); +} + +static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + int i; + + if (!dirty_i) + return; + + /* discard pre-free/dirty segments list */ + for (i = 0; i < NR_DIRTY_TYPE; i++) + discard_dirty_segmap(sbi, i); + + destroy_victim_secmap(sbi); + SM_I(sbi)->dirty_info = NULL; + kfree(dirty_i); +} + +static void destroy_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array = SM_I(sbi)->curseg_array; + int i; + + if (!array) + return; + SM_I(sbi)->curseg_array = NULL; + for (i = 0; i < NR_CURSEG_TYPE; i++) + kfree(array[i].sum_blk); + kfree(array); +} + +static void destroy_free_segmap(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = SM_I(sbi)->free_info; + if (!free_i) + return; + SM_I(sbi)->free_info = NULL; + kfree(free_i->free_segmap); + kfree(free_i->free_secmap); + kfree(free_i); +} + +static void destroy_sit_info(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int start; + + if (!sit_i) + return; + + if (sit_i->sentries) { + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + kfree(sit_i->sentries[start].cur_valid_map); + kfree(sit_i->sentries[start].ckpt_valid_map); + } + } + vfree(sit_i->sentries); + vfree(sit_i->sec_entries); + kfree(sit_i->dirty_sentries_bitmap); + + SM_I(sbi)->sit_info = NULL; + kfree(sit_i->sit_bitmap); + kfree(sit_i); +} + +void destroy_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + destroy_dirty_segmap(sbi); + destroy_curseg(sbi); + destroy_free_segmap(sbi); + destroy_sit_info(sbi); + sbi->sm_info = NULL; + kfree(sm_info); +} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h new file mode 100644 index 00000000000..062424a0e4c --- /dev/null +++ b/fs/f2fs/segment.h @@ -0,0 +1,637 @@ +/* + * fs/f2fs/segment.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +/* constant macro */ +#define NULL_SEGNO ((unsigned int)(~0)) +#define NULL_SECNO ((unsigned int)(~0)) + +/* L: Logical segment # in volume, R: Relative segment # in main area */ +#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) +#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) + +#define IS_DATASEG(t) \ + ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ + (t == CURSEG_WARM_DATA)) + +#define IS_NODESEG(t) \ + ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ + (t == CURSEG_WARM_NODE)) + +#define IS_CURSEG(sbi, seg) \ + ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + +#define IS_CURSEC(sbi, secno) \ + ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + sbi->segs_per_sec)) \ + +#define START_BLOCK(sbi, segno) \ + (SM_I(sbi)->seg0_blkaddr + \ + (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) +#define NEXT_FREE_BLKADDR(sbi, curseg) \ + (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + +#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) + +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ + ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_SEGNO(sbi, blk_addr) \ + (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) +#define GET_SECNO(sbi, segno) \ + ((segno) / sbi->segs_per_sec) +#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ + ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) + +#define GET_SUM_BLOCK(sbi, segno) \ + ((sbi->sm_info->ssa_blkaddr) + segno) + +#define GET_SUM_TYPE(footer) ((footer)->entry_type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) + +#define SIT_ENTRY_OFFSET(sit_i, segno) \ + (segno % sit_i->sents_per_block) +#define SIT_BLOCK_OFFSET(sit_i, segno) \ + (segno / SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(sit_i, segno) \ + (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define f2fs_bitmap_size(nr) \ + (BITS_TO_LONGS(nr) * sizeof(unsigned long)) +#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) +#define TOTAL_SECS(sbi) (sbi->total_sections) + +#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ + (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) +#define SECTOR_TO_BLOCK(sbi, sectors) \ + (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) + +/* during checkpoint, bio_private is used to synchronize the last bio */ +struct bio_private { + struct f2fs_sb_info *sbi; + bool is_sync; + void *wait; +}; + +/* + * indicate a block allocation direction: RIGHT and LEFT. + * RIGHT means allocating new sections towards the end of volume. + * LEFT means the opposite direction. + */ +enum { + ALLOC_RIGHT = 0, + ALLOC_LEFT +}; + +/* + * In the victim_sel_policy->alloc_mode, there are two block allocation modes. + * LFS writes data sequentially with cleaning operations. + * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. + */ +enum { + LFS = 0, + SSR +}; + +/* + * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. + * GC_CB is based on cost-benefit algorithm. + * GC_GREEDY is based on greedy algorithm. + */ +enum { + GC_CB = 0, + GC_GREEDY +}; + +/* + * BG_GC means the background cleaning job. + * FG_GC means the on-demand cleaning job. + */ +enum { + BG_GC = 0, + FG_GC +}; + +/* for a function parameter to select a victim segment */ +struct victim_sel_policy { + int alloc_mode; /* LFS or SSR */ + int gc_mode; /* GC_CB or GC_GREEDY */ + unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int offset; /* last scanned bitmap offset */ + unsigned int ofs_unit; /* bitmap search unit */ + unsigned int min_cost; /* minimum cost */ + unsigned int min_segno; /* segment # having min. cost */ +}; + +struct seg_entry { + unsigned short valid_blocks; /* # of valid blocks */ + unsigned char *cur_valid_map; /* validity bitmap of blocks */ + /* + * # of valid blocks and the validity bitmap stored in the the last + * checkpoint pack. This information is used by the SSR mode. + */ + unsigned short ckpt_valid_blocks; + unsigned char *ckpt_valid_map; + unsigned char type; /* segment type like CURSEG_XXX_TYPE */ + unsigned long long mtime; /* modification time of the segment */ +}; + +struct sec_entry { + unsigned int valid_blocks; /* # of valid blocks in a section */ +}; + +struct segment_allocation { + void (*allocate_segment)(struct f2fs_sb_info *, int, bool); +}; + +struct sit_info { + const struct segment_allocation *s_ops; + + block_t sit_base_addr; /* start block address of SIT area */ + block_t sit_blocks; /* # of blocks used by SIT area */ + block_t written_valid_blocks; /* # of valid blocks in main area */ + char *sit_bitmap; /* SIT bitmap pointer */ + unsigned int bitmap_size; /* SIT bitmap size */ + + unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ + unsigned int dirty_sentries; /* # of dirty sentries */ + unsigned int sents_per_block; /* # of SIT entries per block */ + struct mutex sentry_lock; /* to protect SIT cache */ + struct seg_entry *sentries; /* SIT segment-level cache */ + struct sec_entry *sec_entries; /* SIT section-level cache */ + + /* for cost-benefit algorithm in cleaning procedure */ + unsigned long long elapsed_time; /* elapsed time after mount */ + unsigned long long mounted_time; /* mount time */ + unsigned long long min_mtime; /* min. modification time */ + unsigned long long max_mtime; /* max. modification time */ +}; + +struct free_segmap_info { + unsigned int start_segno; /* start segment number logically */ + unsigned int free_segments; /* # of free segments */ + unsigned int free_sections; /* # of free sections */ + rwlock_t segmap_lock; /* free segmap lock */ + unsigned long *free_segmap; /* free segment bitmap */ + unsigned long *free_secmap; /* free section bitmap */ +}; + +/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ +enum dirty_type { + DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ + DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ + DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ + DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ + DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ + DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ + DIRTY, /* to count # of dirty segments */ + PRE, /* to count # of entirely obsolete segments */ + NR_DIRTY_TYPE +}; + +struct dirty_seglist_info { + const struct victim_selection *v_ops; /* victim selction operation */ + unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + struct mutex seglist_lock; /* lock for segment bitmaps */ + int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ + unsigned long *victim_secmap; /* background GC victims */ +}; + +/* victim selection function for cleaning and SSR */ +struct victim_selection { + int (*get_victim)(struct f2fs_sb_info *, unsigned int *, + int, int, char); +}; + +/* for active log information */ +struct curseg_info { + struct mutex curseg_mutex; /* lock for consistency */ + struct f2fs_summary_block *sum_blk; /* cached summary block */ + unsigned char alloc_type; /* current allocation type */ + unsigned int segno; /* current segment number */ + unsigned short next_blkoff; /* next block offset to write */ + unsigned int zone; /* current zone number */ + unsigned int next_segno; /* preallocated segment */ +}; + +/* + * inline functions + */ +static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) +{ + return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); +} + +static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sentries[segno]; +} + +static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; +} + +static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno, int section) +{ + /* + * In order to get # of valid blocks in a section instantly from many + * segments, f2fs manages two counting structures separately. + */ + if (section > 1) + return get_sec_entry(sbi, segno)->valid_blocks; + else + return get_seg_entry(sbi, segno)->valid_blocks; +} + +static inline void seg_info_from_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + se->valid_blocks = GET_SIT_VBLOCKS(rs); + se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); + memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->type = GET_SIT_TYPE(rs); + se->mtime = le64_to_cpu(rs->mtime); +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | + se->valid_blocks; + rs->vblocks = cpu_to_le16(raw_vblocks); + memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->ckpt_valid_blocks = se->valid_blocks; + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, + unsigned int max, unsigned int segno) +{ + unsigned int ret; + read_lock(&free_i->segmap_lock); + ret = find_next_bit(free_i->free_segmap, max, segno); + read_unlock(&free_i->segmap_lock); + return ret; +} + +static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + clear_bit(segno, free_i->free_segmap); + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + clear_bit(secno, free_i->free_secmap); + free_i->free_sections++; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + set_bit(segno, free_i->free_segmap); + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; +} + +static inline void __set_test_and_free(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + if (test_and_clear_bit(segno, free_i->free_segmap)) { + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), + start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + if (test_and_clear_bit(secno, free_i->free_secmap)) + free_i->free_sections++; + } + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + write_lock(&free_i->segmap_lock); + if (!test_and_set_bit(segno, free_i->free_segmap)) { + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, + void *dst_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); +} + +static inline block_t written_block_count(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_t vblocks; + + mutex_lock(&sit_i->sentry_lock); + vblocks = sit_i->written_valid_blocks; + mutex_unlock(&sit_i->sentry_lock); + + return vblocks; +} + +static inline unsigned int free_segments(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_segs; + + read_lock(&free_i->segmap_lock); + free_segs = free_i->free_segments; + read_unlock(&free_i->segmap_lock); + + return free_segs; +} + +static inline int reserved_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->reserved_segments; +} + +static inline unsigned int free_sections(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_secs; + + read_lock(&free_i->segmap_lock); + free_secs = free_i->free_sections; + read_unlock(&free_i->segmap_lock); + + return free_secs; +} + +static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[PRE]; +} + +static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; +} + +static inline int overprovision_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->ovp_segments; +} + +static inline int overprovision_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; +} + +static inline int reserved_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; +} + +static inline bool need_SSR(struct f2fs_sb_info *sbi) +{ + return (free_sections(sbi) < overprovision_sections(sbi)); +} + +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + + if (sbi->por_doing) + return false; + + return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi))); +} + +static inline int utilization(struct f2fs_sb_info *sbi) +{ + return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); +} + +/* + * Sometimes f2fs may be better to drop out-of-place update policy. + * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write + * data in the original place likewise other traditional file systems. + * But, currently set 100 in percentage, which means it is disabled. + * See below need_inplace_update(). + */ +#define MIN_IPU_UTIL 100 +static inline bool need_inplace_update(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode)) + return false; + if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + return true; + return false; +} + +static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->segno; +} + +static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->alloc_type; +} + +static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->next_blkoff; +} + +static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) +{ + unsigned int end_segno = SM_I(sbi)->segment_count - 1; + BUG_ON(segno > end_segno); +} + +/* + * This function is used for only debugging. + * NOTE: In future, we have to remove this function. + */ +static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; + block_t start_addr = sm_info->seg0_blkaddr; + block_t end_addr = start_addr + total_blks - 1; + BUG_ON(blk_addr < start_addr); + BUG_ON(blk_addr > end_addr); +} + +/* + * Summary block is always treated as invalid block + */ +static inline void check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + unsigned int end_segno = sm_info->segment_count - 1; + int valid_blocks = 0; + int i; + + /* check segment usage */ + BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); + + /* check boundary of a given segment number */ + BUG_ON(segno > end_segno); + + /* check bitmap with valid block count */ + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, raw_sit->valid_map)) + valid_blocks++; + BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); +} + +static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, start); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return blk_addr; +} + +static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_addr -= sit_i->sit_base_addr; + if (block_addr < sit_i->sit_blocks) + block_addr += sit_i->sit_blocks; + else + block_addr -= sit_i->sit_blocks; + + return block_addr + sit_i->sit_base_addr; +} + +static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) +{ + unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + + if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) + f2fs_clear_bit(block_off, sit_i->sit_bitmap); + else + f2fs_set_bit(block_off, sit_i->sit_bitmap); +} + +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - + sit_i->mounted_time; +} + +static inline void set_summary(struct f2fs_summary *sum, nid_t nid, + unsigned int ofs_in_node, unsigned char version) +{ + sum->nid = cpu_to_le32(nid); + sum->ofs_in_node = cpu_to_le16(ofs_in_node); + sum->version = version; +} + +static inline block_t start_sum_block(struct f2fs_sb_info *sbi) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) + - (base + 1) + type; +} + +static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) +{ + if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + return true; + return false; +} + +static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c new file mode 100644 index 00000000000..e8c28d18b3e --- /dev/null +++ b/fs/f2fs/super.c @@ -0,0 +1,1154 @@ +/* + * fs/f2fs/super.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "xattr.h" +#include "gc.h" + +#define CREATE_TRACE_POINTS +#include + +static struct proc_dir_entry *f2fs_proc_root; +static struct kmem_cache *f2fs_inode_cachep; +static struct kset *f2fs_kset; + +enum { + Opt_gc_background, + Opt_disable_roll_forward, + Opt_discard, + Opt_noheap, + Opt_nouser_xattr, + Opt_noacl, + Opt_active_logs, + Opt_disable_ext_identify, + Opt_inline_xattr, + Opt_android_emu, + Opt_err_continue, + Opt_err_panic, + Opt_err_recover, + Opt_err, +}; + +static match_table_t f2fs_tokens = { + {Opt_gc_background, "background_gc=%s"}, + {Opt_disable_roll_forward, "disable_roll_forward"}, + {Opt_discard, "discard"}, + {Opt_noheap, "no_heap"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_noacl, "noacl"}, + {Opt_active_logs, "active_logs=%u"}, + {Opt_disable_ext_identify, "disable_ext_identify"}, + {Opt_inline_xattr, "inline_xattr"}, + {Opt_android_emu, "android_emu=%s"}, + {Opt_err_continue, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_recover, "errors=recover"}, + {Opt_err, NULL}, +}; + +/* Sysfs support for f2fs */ +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int offset; +}; + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned int *ui; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .offset = offsetof(struct f2fs_gc_kthread, _elname), \ +} + +#define F2FS_RW_ATTR(name, elname) \ + F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) + +F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(gc_idle, gc_idle); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + va_end(args); +} + +static void init_once(void *foo) +{ + struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; + + inode_init_once(&fi->vfs_inode); +} + +static int parse_android_emu(struct f2fs_sb_info *sbi, char *args) +{ + char *sep = args; + char *sepres; + int ret; + + if (!sep) + return -EINVAL; + + sepres = strsep(&sep, ":"); + if (!sep) + return -EINVAL; + ret = kstrtou32(sepres, 0, &sbi->android_emu_uid); + if (ret) + return ret; + + sepres = strsep(&sep, ":"); + if (!sep) + return -EINVAL; + ret = kstrtou32(sepres, 0, &sbi->android_emu_gid); + if (ret) + return ret; + + sepres = strsep(&sep, ":"); + ret = kstrtou16(sepres, 8, &sbi->android_emu_mode); + if (ret) + return ret; + + if (sep && strstr(sep, "nocase")) + sbi->android_emu_flags = F2FS_ANDROID_EMU_NOCASE; + + return 0; +} + +static int parse_options(struct super_block *sb, char *options) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p, *name; + int arg = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = NULL; + token = match_token(p, f2fs_tokens, args); + + switch (token) { + case Opt_gc_background: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strncmp(name, "on", 2)) + set_opt(sbi, BG_GC); + else if (!strncmp(name, "off", 3)) + clear_opt(sbi, BG_GC); + else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_discard: + set_opt(sbi, DISCARD); + break; + case Opt_noheap: + set_opt(sbi, NOHEAP); + break; +#ifdef CONFIG_F2FS_FS_XATTR + case Opt_nouser_xattr: + clear_opt(sbi, XATTR_USER); + break; + case Opt_inline_xattr: + set_opt(sbi, INLINE_XATTR); + break; +#else + case Opt_nouser_xattr: + f2fs_msg(sb, KERN_INFO, + "nouser_xattr options not supported"); + break; + case Opt_inline_xattr: + f2fs_msg(sb, KERN_INFO, + "inline_xattr options not supported"); + break; +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_noacl: + clear_opt(sbi, POSIX_ACL); + break; +#else + case Opt_noacl: + f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + break; +#endif + case Opt_active_logs: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) + return -EINVAL; + sbi->active_logs = arg; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + case Opt_err_continue: + clear_opt(sbi, ERRORS_RECOVER); + clear_opt(sbi, ERRORS_PANIC); + break; + case Opt_err_panic: + set_opt(sbi, ERRORS_PANIC); + clear_opt(sbi, ERRORS_RECOVER); + break; + case Opt_err_recover: + set_opt(sbi, ERRORS_RECOVER); + clear_opt(sbi, ERRORS_PANIC); + break; + case Opt_android_emu: + if (args->from) { + int ret; + char *perms = match_strdup(args); + + ret = parse_android_emu(sbi, perms); + kfree(perms); + + if (ret) + return -EINVAL; + + set_opt(sbi, ANDROID_EMU); + } else + return -EINVAL; + break; + + default: + f2fs_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" or missing value", + p); + return -EINVAL; + } + } + return 0; +} + +static struct inode *f2fs_alloc_inode(struct super_block *sb) +{ + struct f2fs_inode_info *fi; + + fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); + if (!fi) + return NULL; + + init_once((void *) fi); + + /* Initialize f2fs-specific inode info */ + fi->vfs_inode.i_version = 1; + atomic_set(&fi->dirty_dents, 0); + fi->i_current_depth = 1; + fi->i_advise = 0; + rwlock_init(&fi->ext.ext_lock); + + set_inode_flag(fi, FI_NEW_INODE); + + if (test_opt(F2FS_SB(sb), INLINE_XATTR)) + set_inode_flag(fi, FI_INLINE_XATTR); + + return &fi->vfs_inode; +} + +static int f2fs_drop_inode(struct inode *inode) +{ + /* + * This is to avoid a deadlock condition like below. + * writeback_single_inode(inode) + * - f2fs_write_data_page + * - f2fs_gc -> iput -> evict + * - inode_wait_for_writeback(inode) + */ + if (!inode_unhashed(inode) && inode->i_state & I_SYNC) + return 0; + return generic_drop_inode(inode); +} + +/* + * f2fs_dirty_inode() is called from __mark_inode_dirty() + * + * We should call set_dirty_inode to write the dirty inode through write_inode. + */ +static void f2fs_dirty_inode(struct inode *inode, int flags) +{ + set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); +} + +static void f2fs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); +} + +static void f2fs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, f2fs_i_callback); +} + +static void f2fs_put_super(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + kobject_del(&sbi->s_kobj); + + f2fs_destroy_stats(sbi); + stop_gc_thread(sbi); + + write_checkpoint(sbi, true); + + iput(sbi->node_inode); + iput(sbi->meta_inode); + + /* destroy f2fs internal modules */ + destroy_node_manager(sbi); + destroy_segment_manager(sbi); + + kfree(sbi->ckpt); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + + sb->s_fs_info = NULL; + brelse(sbi->raw_super_buf); + kfree(sbi); +} + +int f2fs_sync_fs(struct super_block *sb, int sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + trace_f2fs_sync_fs(sb, sync); + + if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) + return 0; + + if (sync) { + mutex_lock(&sbi->gc_mutex); + write_checkpoint(sbi, false); + mutex_unlock(&sbi->gc_mutex); + } else { + f2fs_balance_fs(sbi); + } + + return 0; +} + +static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + block_t total_count, user_block_count, start_count, ovp_count; + + total_count = le64_to_cpu(sbi->raw_super->block_count); + user_block_count = sbi->user_block_count; + start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); + ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + buf->f_type = F2FS_SUPER_MAGIC; + buf->f_bsize = sbi->blocksize; + + buf->f_blocks = total_count - start_count; + buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; + buf->f_bavail = user_block_count - valid_user_blocks(sbi); + + buf->f_files = sbi->total_node_count; + buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); + + buf->f_namelen = F2FS_NAME_LEN; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + + return 0; +} + +static int f2fs_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct f2fs_sb_info *sbi = F2FS_SB(vfs->mnt_sb); + + if (!(vfs->mnt_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + seq_printf(seq, ",background_gc=%s", "on"); + else + seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) + seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, DISCARD)) + seq_puts(seq, ",discard"); + if (test_opt(sbi, NOHEAP)) + seq_puts(seq, ",no_heap_alloc"); +#ifdef CONFIG_F2FS_FS_XATTR + if (test_opt(sbi, XATTR_USER)) + seq_puts(seq, ",user_xattr"); + else + seq_puts(seq, ",nouser_xattr"); + if (test_opt(sbi, INLINE_XATTR)) + seq_puts(seq, ",inline_xattr"); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + if (test_opt(sbi, POSIX_ACL)) + seq_puts(seq, ",acl"); + else + seq_puts(seq, ",noacl"); +#endif + if (test_opt(sbi, ERRORS_PANIC)) + seq_puts(seq, ",errors=panic"); + else if (test_opt(sbi, ERRORS_RECOVER)) + seq_puts(seq, ",errors=recover"); + else + seq_puts(seq, ",errors=continue"); + if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) + seq_puts(seq, ",disable_ext_identify"); + + if (test_opt(sbi, ANDROID_EMU)) + seq_printf(seq, ",android_emu=%u:%u:%ho%s", + sbi->android_emu_uid, + sbi->android_emu_gid, + sbi->android_emu_mode, + (sbi->android_emu_flags & + F2FS_ANDROID_EMU_NOCASE) ? + ":nocase" : ""); + + seq_printf(seq, ",active_logs=%u", sbi->active_logs); + + return 0; +} + +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + for (i = 0; i < total_segs; i++) { + seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); + if (i != 0 && (i % 10) == 0) + seq_puts(seq, "\n"); + else + seq_puts(seq, " "); + } + return 0; +} + +static int segment_info_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, segment_info_seq_show, + PROC_I(inode)->pde->data); +} + +static const struct file_operations f2fs_seq_segment_info_fops = { + .owner = THIS_MODULE, + .open = segment_info_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int f2fs_remount(struct super_block *sb, int *flags, char *data) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_mount_info org_mount_opt; + int err, active_logs; + + /* + * Save the old mount options in case we + * need to restore them. + */ + org_mount_opt = sbi->mount_opt; + active_logs = sbi->active_logs; + + /* parse mount options */ + err = parse_options(sb, data); + if (err) + goto restore_opts; + + /* + * Previous and new state of filesystem is RO, + * so no point in checking GC conditions. + */ + if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + goto skip; + + /* + * We stop the GC thread if FS is mounted as RO + * or if background_gc = off is passed in mount + * option. Also sync the filesystem. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if (sbi->gc_thread) { + stop_gc_thread(sbi); + f2fs_sync_fs(sb, 1); + } + } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + err = start_gc_thread(sbi); + if (err) + goto restore_opts; + } +skip: + /* Update the POSIXACL Flag */ + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + return 0; + +restore_opts: + sbi->mount_opt = org_mount_opt; + sbi->active_logs = active_logs; + return err; +} + +static struct super_operations f2fs_sops = { + .alloc_inode = f2fs_alloc_inode, + .drop_inode = f2fs_drop_inode, + .destroy_inode = f2fs_destroy_inode, + .write_inode = f2fs_write_inode, + .dirty_inode = f2fs_dirty_inode, + .show_options = f2fs_show_options, + .evict_inode = f2fs_evict_inode, + .put_super = f2fs_put_super, + .sync_fs = f2fs_sync_fs, + .statfs = f2fs_statfs, + .remount_fs = f2fs_remount, +}; + +static struct inode *f2fs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + + if (ino < F2FS_ROOT_INO(sbi)) + return ERR_PTR(-ESTALE); + + /* + * f2fs_iget isn't quite right if the inode is currently unallocated! + * However f2fs_iget currently does appropriate checks to handle stale + * inodes so everything is OK. + */ + inode = f2fs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { + /* we didn't find the right inode.. */ + iput(inode); + return ERR_PTR(-ESTALE); + } + return inode; +} + +static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static const struct export_operations f2fs_export_ops = { + .fh_to_dentry = f2fs_fh_to_dentry, + .fh_to_parent = f2fs_fh_to_parent, + .get_parent = f2fs_get_parent, +}; + +static loff_t max_file_size(unsigned bits) +{ + loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); + loff_t leaf_count = ADDRS_PER_BLOCK; + + /* two direct node blocks */ + result += (leaf_count * 2); + + /* two indirect node blocks */ + leaf_count *= NIDS_PER_BLOCK; + result += (leaf_count * 2); + + /* one double indirect node block */ + leaf_count *= NIDS_PER_BLOCK; + result += leaf_count; + + result <<= bits; + return result; +} + +static int sanity_check_raw_super(struct super_block *sb, + struct f2fs_super_block *raw_super) +{ + unsigned int blocksize; + + if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { + f2fs_msg(sb, KERN_INFO, + "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + return 1; + } + + /* Currently, support only 4KB page cache size */ + if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) { + f2fs_msg(sb, KERN_INFO, + "Invalid page_cache_size (%lu), supports only 4KB\n", + PAGE_CACHE_SIZE); + return 1; + } + + /* Currently, support only 4KB block size */ + blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); + if (blocksize != F2FS_BLKSIZE) { + f2fs_msg(sb, KERN_INFO, + "Invalid blocksize (%u), supports only 4KB\n", + blocksize); + return 1; + } + + if (le32_to_cpu(raw_super->log_sectorsize) != + F2FS_LOG_SECTOR_SIZE) { + f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); + return 1; + } + if (le32_to_cpu(raw_super->log_sectors_per_block) != + F2FS_LOG_SECTORS_PER_BLOCK) { + f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); + return 1; + } + return 0; +} + +static int sanity_check_ckpt(struct f2fs_sb_info *sbi) +{ + unsigned int total, fsmeta; + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + total = le32_to_cpu(raw_super->segment_count); + fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); + fsmeta += le32_to_cpu(raw_super->segment_count_sit); + fsmeta += le32_to_cpu(raw_super->segment_count_nat); + fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); + fsmeta += le32_to_cpu(raw_super->segment_count_ssa); + + if (fsmeta >= total) + return 1; + + if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + return 1; + } + return 0; +} + +static void init_sb_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = sbi->raw_super; + int i; + + sbi->log_sectors_per_block = + le32_to_cpu(raw_super->log_sectors_per_block); + sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize); + sbi->blocksize = 1 << sbi->log_blocksize; + sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg; + sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); + sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); + sbi->total_sections = le32_to_cpu(raw_super->section_count); + sbi->total_node_count = + (le32_to_cpu(raw_super->segment_count_nat) / 2) + * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; + sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); + sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); + sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + sbi->cur_victim_sec = NULL_SECNO; + + for (i = 0; i < NR_COUNT_TYPE; i++) + atomic_set(&sbi->nr_pages[i], 0); +} + +static int validate_superblock(struct super_block *sb, + struct f2fs_super_block **raw_super, + struct buffer_head **raw_super_buf, sector_t block) +{ + const char *super = (block == 0 ? "first" : "second"); + + /* read f2fs raw super block */ + *raw_super_buf = sb_bread(sb, block); + if (!*raw_super_buf) { + f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", + super); + return -EIO; + } + + *raw_super = (struct f2fs_super_block *) + ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); + + /* sanity checking of raw super */ + if (!sanity_check_raw_super(sb, *raw_super)) + return 0; + + f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " + "in %s superblock", super); + return -EINVAL; +} + +static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct f2fs_sb_info *sbi; + struct f2fs_super_block *raw_super; + struct buffer_head *raw_super_buf; + struct inode *root; + long err = -EINVAL; + int i; + const char *descr = ""; + + f2fs_msg(sb, KERN_INFO, "mounting.."); + /* allocate memory for f2fs-specific super block info */ + sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + /* set a block size */ + if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { + f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); + goto free_sbi; + } + + err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); + if (err) { + brelse(raw_super_buf); + /* check secondary superblock when primary failed */ + err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); + if (err) + goto free_sb_buf; + } + sb->s_fs_info = sbi; + /* init some FS parameters */ + sbi->active_logs = NR_CURSEG_TYPE; + + set_opt(sbi, BG_GC); + +#ifdef CONFIG_F2FS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif + /* parse mount options */ + err = parse_options(sb, (char *)data); + if (err) + goto free_sb_buf; + + sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + get_random_bytes(&sbi->s_next_generation, sizeof(u32)); + + sb->s_op = &f2fs_sops; + sb->s_xattr = f2fs_xattr_handlers; + sb->s_export_op = &f2fs_export_ops; + sb->s_magic = F2FS_SUPER_MAGIC; + sb->s_time_gran = 1; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + + /* init f2fs-specific super block info */ + sbi->sb = sb; + sbi->raw_super = raw_super; + sbi->raw_super_buf = raw_super_buf; + mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); + mutex_init(&sbi->cp_mutex); + for (i = 0; i < NR_GLOBAL_LOCKS; i++) + mutex_init(&sbi->fs_lock[i]); + mutex_init(&sbi->node_write); + sbi->por_doing = 0; + spin_lock_init(&sbi->stat_lock); + init_rwsem(&sbi->bio_sem); + init_sb_info(sbi); + + /* get an inode for meta space */ + sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); + if (IS_ERR(sbi->meta_inode)) { + f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); + err = PTR_ERR(sbi->meta_inode); + goto free_sb_buf; + } + +get_cp: + err = get_valid_checkpoint(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); + goto free_meta_inode; + } + + /* sanity checking of checkpoint */ + err = -EINVAL; + if (sanity_check_ckpt(sbi)) { + f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint"); + goto free_cp; + } + + sbi->total_valid_node_count = + le32_to_cpu(sbi->ckpt->valid_node_count); + sbi->total_valid_inode_count = + le32_to_cpu(sbi->ckpt->valid_inode_count); + sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); + sbi->total_valid_block_count = + le64_to_cpu(sbi->ckpt->valid_block_count); + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + INIT_LIST_HEAD(&sbi->dir_inode_list); + spin_lock_init(&sbi->dir_inode_lock); + + init_orphan_info(sbi); + + /* setup f2fs internal modules */ + err = build_segment_manager(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to initialize F2FS segment manager"); + goto free_sm; + } + err = build_node_manager(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to initialize F2FS node manager"); + goto free_nm; + } + + build_gc_manager(sbi); + + /* get an inode for node space */ + sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); + if (IS_ERR(sbi->node_inode)) { + f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); + err = PTR_ERR(sbi->node_inode); + goto free_nm; + } + + /* if there are nt orphan nodes free them */ + err = -EINVAL; + if (recover_orphan_inodes(sbi)) + goto free_node_inode; + + /* read root inode and dentry */ + root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); + if (IS_ERR(root)) { + f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); + err = PTR_ERR(root); + goto free_node_inode; + } + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) + goto free_root_inode; + + sb->s_root = d_alloc_root(root); /* allocate root dentry */ + if (!sb->s_root) { + err = -ENOMEM; + goto free_root_inode; + } + + /* recover fsynced data */ + if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + err = recover_fsync_data(sbi); + if (err) { + if (f2fs_handle_error(sbi)) { + set_opt(sbi, DISABLE_ROLL_FORWARD); + kfree(sbi->ckpt); + f2fs_msg(sb, KERN_ERR, + "reloading last checkpoint"); + goto get_cp; + } + f2fs_msg(sb, KERN_ERR, + "cannot recover all fsync data errno=%ld", err); + /* checkpoint what we have */ + write_checkpoint(sbi, false); + } + } + + /* + * If filesystem is not mounted as read-only then + * do start the gc_thread. + */ + if (!(sb->s_flags & MS_RDONLY)) { + /* After POR, we can run background GC thread.*/ + err = start_gc_thread(sbi); + if (err) + goto fail; + } + + err = f2fs_build_stats(sbi); + if (err) + goto fail; + + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + + if (test_opt(sbi, DISCARD)) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + if (!blk_queue_discard(q)) + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } + + if (test_opt(sbi, ANDROID_EMU)) + descr = " with android sdcard emulation"; + f2fs_msg(sb, KERN_INFO, "mounted filesystem%s", descr); + + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto fail; + + return 0; +fail: + stop_gc_thread(sbi); +free_root_inode: + iput(root); +free_node_inode: + iput(sbi->node_inode); +free_nm: + destroy_node_manager(sbi); +free_sm: + destroy_segment_manager(sbi); +free_cp: + kfree(sbi->ckpt); +free_meta_inode: + make_bad_inode(sbi->meta_inode); + iput(sbi->meta_inode); +free_sb_buf: + brelse(raw_super_buf); +free_sbi: + kfree(sbi); + f2fs_msg(sb, KERN_ERR, "mount failed"); + return err; +} + +static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); +} + +static struct file_system_type f2fs_fs_type = { + .owner = THIS_MODULE, + .name = "f2fs", + .mount = f2fs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_inodecache(void) +{ + f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", + sizeof(struct f2fs_inode_info), NULL); + if (f2fs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(f2fs_inode_cachep); +} + +static int __init init_f2fs_fs(void) +{ + int err; + + err = init_inodecache(); + if (err) + goto fail; + err = create_node_manager_caches(); + if (err) + goto free_inodecache; + err = create_gc_caches(); + if (err) + goto free_node_manager_caches; + err = create_checkpoint_caches(); + if (err) + goto free_gc_caches; + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) { + err = -ENOMEM; + goto free_checkpoint_caches; + } + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_kset; + f2fs_create_root_stats(); + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + return 0; + +free_kset: + kset_unregister(f2fs_kset); +free_checkpoint_caches: + destroy_checkpoint_caches(); +free_gc_caches: + destroy_gc_caches(); +free_node_manager_caches: + destroy_node_manager_caches(); +free_inodecache: + destroy_inodecache(); +fail: + return err; +} + +static void __exit exit_f2fs_fs(void) +{ + remove_proc_entry("fs/f2fs", NULL); + f2fs_destroy_root_stats(); + unregister_filesystem(&f2fs_fs_type); + destroy_checkpoint_caches(); + destroy_gc_caches(); + destroy_node_manager_caches(); + destroy_inodecache(); + kset_unregister(f2fs_kset); +} + +module_init(init_f2fs_fs) +module_exit(exit_f2fs_fs) + +MODULE_AUTHOR("Samsung Electronics's Praesto Team"); +MODULE_DESCRIPTION("Flash Friendly File System"); +MODULE_LICENSE("GPL"); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c new file mode 100644 index 00000000000..85b99ebcbd5 --- /dev/null +++ b/fs/f2fs/xattr.c @@ -0,0 +1,600 @@ +/* + * fs/f2fs/xattr.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher + * + * Fix by Harrison Xing . + * Extended attributes for symlinks and special files added per + * suggestion of Luka Renko . + * xattr consolidation Copyright (c) 2004 James Morris , + * Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include "f2fs.h" +#include "xattr.h" + +static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + int total_len, prefix_len = 0; + const char *prefix = NULL; + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + prefix = XATTR_USER_PREFIX; + prefix_len = XATTR_USER_PREFIX_LEN; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + prefix = XATTR_TRUSTED_PREFIX; + prefix_len = XATTR_TRUSTED_PREFIX_LEN; + break; + case F2FS_XATTR_INDEX_SECURITY: + prefix = XATTR_SECURITY_PREFIX; + prefix_len = XATTR_SECURITY_PREFIX_LEN; + break; + default: + return -EINVAL; + } + + total_len = prefix_len + name_len + 1; + if (list && total_len <= list_size) { + memcpy(list, prefix, prefix_len); + memcpy(list + prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + case F2FS_XATTR_INDEX_SECURITY: + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); +} + +static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + case F2FS_XATTR_INDEX_SECURITY: + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + + return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); +} + +static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; + size_t size; + + if (type != F2FS_XATTR_INDEX_ADVISE) + return 0; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct inode *inode = dentry->d_inode; + + if (!name || strcmp(name, "") != 0) + return -EINVAL; + + if (buffer) + *((char *)buffer) = F2FS_I(inode)->i_advise; + return sizeof(char); +} + +static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct inode *inode = dentry->d_inode; + + if (!name || strcmp(name, "") != 0) + return -EINVAL; + if (!inode_owner_or_capable(inode)) + return -EPERM; + if (value == NULL) + return -EINVAL; + + F2FS_I(inode)->i_advise = *(char *)value; + return 0; +} + +#ifdef CONFIG_F2FS_FS_SECURITY +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage); +static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *page) +{ + const struct xattr *xattr; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, (struct page *)page); + if (err < 0) + break; + } + return err; +} + +int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return security_new_inode_init_security(inode, dir, qstr, + &f2fs_initxattrs, ipage); +} +#endif + +const struct xattr_handler f2fs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = F2FS_XATTR_INDEX_USER, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = F2FS_XATTR_INDEX_TRUSTED, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_advise_handler = { + .prefix = F2FS_SYSTEM_ADVISE_PREFIX, + .flags = F2FS_XATTR_INDEX_ADVISE, + .list = f2fs_xattr_advise_list, + .get = f2fs_xattr_advise_get, + .set = f2fs_xattr_advise_set, +}; + +const struct xattr_handler f2fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = F2FS_XATTR_INDEX_SECURITY, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +static const struct xattr_handler *f2fs_xattr_handler_map[] = { + [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, +#endif + [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, +#endif + [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, +}; + +const struct xattr_handler *f2fs_xattr_handlers[] = { + &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + &f2fs_xattr_acl_access_handler, + &f2fs_xattr_acl_default_handler, +#endif + &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + &f2fs_xattr_security_handler, +#endif + &f2fs_xattr_advise_handler, + NULL, +}; + +static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +{ + const struct xattr_handler *handler = NULL; + + if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) + handler = f2fs_xattr_handler_map[name_index]; + return handler; +} + +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, + size_t name_len, const char *name) +{ + struct f2fs_xattr_entry *entry; + + list_for_each_xattr(entry, base_addr) { + if (entry->e_name_index != name_index) + continue; + if (entry->e_name_len != name_len) + continue; + if (!memcmp(entry->e_name, name, name_len)) + break; + } + return entry; +} + +static void *read_all_xattrs(struct inode *inode, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_xattr_header *header; + size_t size = PAGE_SIZE, inline_size = 0; + void *txattr_addr; + + inline_size = inline_xattr_size(inode); + + txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); + if (!txattr_addr) + return NULL; + + /* read from inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + goto fail; + inline_addr = inline_xattr_addr(page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + } + + /* read from xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) + goto fail; + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + f2fs_put_page(xpage, 1); + } + + header = XATTR_HDR(txattr_addr); + + /* never been allocated xattrs */ + if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { + header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); + header->h_refcount = cpu_to_le32(1); + } + return txattr_addr; +fail: + kzfree(txattr_addr); + return NULL; +} + +static inline int write_all_xattrs(struct inode *inode, __u32 hsize, + void *txattr_addr, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + size_t inline_size = 0; + void *xattr_addr; + struct page *xpage; + nid_t new_nid = 0; + int err; + + inline_size = inline_xattr_size(inode); + + if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) + if (!alloc_nid(sbi, &new_nid)) + return -ENOSPC; + + /* write to inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(page); + } + inline_addr = inline_xattr_addr(page); + } + memcpy(inline_addr, txattr_addr, inline_size); + f2fs_put_page(page, 1); + + /* no need to use xattr node block */ + if (hsize <= inline_size) { + err = truncate_xattr_node(inode, ipage); + alloc_nid_failed(sbi, new_nid); + return err; + } + } + + /* write to xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + BUG_ON(new_nid); + } else { + struct dnode_of_data dn; + set_new_dnode(&dn, inode, NULL, NULL, new_nid); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + alloc_nid_done(sbi, new_nid); + } + + xattr_addr = page_address(xpage); + memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - + sizeof(struct node_footer)); + set_page_dirty(xpage); + f2fs_put_page(xpage, 1); + + /* need to checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + return 0; +} + +int f2fs_getxattr(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct f2fs_xattr_entry *entry; + void *base_addr; + int error = 0; + size_t value_len, name_len; + + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; + + entry = __find_xattr(base_addr, name_index, name_len, name); + if (IS_XATTR_LAST_ENTRY(entry)) { + error = -ENODATA; + goto cleanup; + } + + value_len = le16_to_cpu(entry->e_value_size); + + if (buffer && value_len > buffer_size) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) { + char *pval = entry->e_name + entry->e_name_len; + memcpy(buffer, pval, value_len); + } + error = value_len; + +cleanup: + kzfree(base_addr); + return error; +} + +ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_xattr_entry *entry; + void *base_addr; + int error = 0; + size_t rest = buffer_size; + + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; + + list_for_each_xattr(entry, base_addr) { + const struct xattr_handler *handler = + f2fs_xattr_handler(entry->e_name_index); + size_t size; + + if (!handler) + continue; + + size = handler->list(dentry, buffer, rest, entry->e_name, + entry->e_name_len, handler->flags); + if (buffer && size > rest) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) + buffer += size; + rest -= size; + } + error = buffer_size - rest; +cleanup: + kzfree(base_addr); + return error; +} + +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_xattr_entry *here, *last; + void *base_addr; + int found, newsize; + size_t name_len; + __u32 new_hsize; + int error = -ENOMEM; + + if (name == NULL) + return -EINVAL; + + if (value == NULL) + value_len = 0; + + name_len = strlen(name); + + if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) + return -ERANGE; + + base_addr = read_all_xattrs(inode, ipage); + if (!base_addr) + goto exit; + + /* find entry with wanted name. */ + here = __find_xattr(base_addr, name_index, name_len, name); + + found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; + last = here; + + while (!IS_XATTR_LAST_ENTRY(last)) + last = XATTR_NEXT_ENTRY(last); + + newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + + name_len + value_len); + + /* 1. Check space */ + if (value) { + int free; + /* + * If value is NULL, it is remove operation. + * In case of update operation, we caculate free. + */ + free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); + if (found) + free = free - ENTRY_SIZE(here); + + if (free < newsize) { + error = -ENOSPC; + goto exit; + } + } + + /* 2. Remove old entry */ + if (found) { + /* + * If entry is found, remove old entry. + * If not found, remove operation is not needed. + */ + struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); + int oldsize = ENTRY_SIZE(here); + + memmove(here, next, (char *)last - (char *)next); + last = (struct f2fs_xattr_entry *)((char *)last - oldsize); + memset(last, 0, oldsize); + } + + new_hsize = (char *)last - (char *)base_addr; + + /* 3. Write new entry */ + if (value) { + char *pval; + /* + * Before we come here, old entry is removed. + * We just write new entry. + */ + memset(last, 0, newsize); + last->e_name_index = name_index; + last->e_name_len = name_len; + memcpy(last->e_name, name, name_len); + pval = last->e_name + name_len; + memcpy(pval, value, value_len); + last->e_value_size = cpu_to_le16(value_len); + new_hsize += newsize; + } + + error = write_all_xattrs(inode, new_hsize, base_addr, ipage); + if (error) + goto exit; + + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + inode->i_ctime = CURRENT_TIME; + clear_inode_flag(fi, FI_ACL_MODE); + } + + if (ipage) + update_inode(inode, ipage); + else + update_inode_page(inode); +exit: + kzfree(base_addr); + return error; +} + +int f2fs_setxattr(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + int err; + + f2fs_balance_fs(sbi); + + ilock = mutex_lock_op(sbi); + + err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); + + mutex_unlock_op(sbi, ilock); + + return err; +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h new file mode 100644 index 00000000000..02a08fb88a1 --- /dev/null +++ b/fs/f2fs/xattr.h @@ -0,0 +1,152 @@ +/* + * fs/f2fs/xattr.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.h + * + * On-disk format of extended attributes for the ext2 filesystem. + * + * (C) 2001 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_XATTR_H__ +#define __F2FS_XATTR_H__ + +#include +#include + +/* Magic value in attribute blocks */ +#define F2FS_XATTR_MAGIC 0xF2F52011 + +/* Maximum number of references to one attribute block */ +#define F2FS_XATTR_REFCOUNT_MAX 1024 + +/* Name indexes */ +#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise" +#define F2FS_XATTR_INDEX_USER 1 +#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define F2FS_XATTR_INDEX_TRUSTED 4 +#define F2FS_XATTR_INDEX_LUSTRE 5 +#define F2FS_XATTR_INDEX_SECURITY 6 +#define F2FS_XATTR_INDEX_ADVISE 7 + +struct f2fs_xattr_header { + __le32 h_magic; /* magic number for identification */ + __le32 h_refcount; /* reference count */ + __u32 h_reserved[4]; /* zero right now */ +}; + +struct f2fs_xattr_entry { + __u8 e_name_index; + __u8 e_name_len; + __le16 e_value_size; /* size of attribute value */ + char e_name[0]; /* attribute name */ +}; + +#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) +#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) +#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) +#define XATTR_ROUND (3) + +#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) + +#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ + entry->e_name_len + le16_to_cpu(entry->e_value_size))) + +#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ + ENTRY_SIZE(entry))) + +#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) + +#define list_for_each_xattr(entry, addr) \ + for (entry = XATTR_FIRST_ENTRY(addr);\ + !IS_XATTR_LAST_ENTRY(entry);\ + entry = XATTR_NEXT_ENTRY(entry)) + +#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ + sizeof(struct node_footer) - sizeof(__u32)) + +#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ + sizeof(struct f2fs_xattr_header) - \ + sizeof(struct f2fs_xattr_entry)) + +/* + * On-disk structure of f2fs_xattr + * We use inline xattrs space + 1 block for xattr. + * + * +--------------------+ + * | f2fs_xattr_header | + * | | + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 1 | + * | .e_name_len = 3 | + * | .e_value_size = 14 | + * | .e_name = "foo" | + * | "value_of_xattr" |<- value_offs = e_name + e_name_len + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 4 | + * | .e_name = "bar" | + * +--------------------+ + * | | + * | Free | + * | | + * +--------------------+<- MIN_OFFSET + * | node_footer | + * | (nid, ino, offset) | + * +--------------------+ + * + **/ + +#ifdef CONFIG_F2FS_FS_XATTR +extern const struct xattr_handler f2fs_xattr_user_handler; +extern const struct xattr_handler f2fs_xattr_trusted_handler; +extern const struct xattr_handler f2fs_xattr_acl_access_handler; +extern const struct xattr_handler f2fs_xattr_acl_default_handler; +extern const struct xattr_handler f2fs_xattr_advise_handler; +extern const struct xattr_handler f2fs_xattr_security_handler; + +extern const struct xattr_handler *f2fs_xattr_handlers[]; + +extern int f2fs_setxattr(struct inode *, int, const char *, + const void *, size_t, struct page *); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +#else + +#define f2fs_xattr_handlers NULL +static inline int f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len) +{ + return -EOPNOTSUPP; +} +static inline int f2fs_getxattr(struct inode *inode, int name_index, + const char *name, void *buffer, size_t buffer_size) +{ + return -EOPNOTSUPP; +} +static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_F2FS_FS_SECURITY +extern int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct page *); +#else +static inline int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return 0; +} +#endif +#endif /* __F2FS_XATTR_H__ */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 713c7c62443..9327888b4b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -239,6 +239,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern struct dentry * d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h new file mode 100644 index 00000000000..bb942f6d570 --- /dev/null +++ b/include/linux/f2fs_fs.h @@ -0,0 +1,424 @@ +/** + * include/linux/f2fs_fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_FS_H +#define _LINUX_F2FS_FS_H + +#include +#include + +#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ +#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ +#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ +#define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ + +#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ +#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ + +#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) +#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) +#define F2FS_META_INO(sbi) (sbi->meta_ino_num) + +/* This flag is used by node and meta inodes, and by recovery */ +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) + +/* + * For further optimization on multi-head logs, on-disk layout supports maximum + * 16 logs by default. The number, 16, is expected to cover all the cases + * enoughly. The implementaion currently uses no more than 6 logs. + * Half the logs are used for nodes, and the other half are used for data. + */ +#define MAX_ACTIVE_LOGS 16 +#define MAX_ACTIVE_NODE_LOGS 8 +#define MAX_ACTIVE_DATA_LOGS 8 + +/* + * For superblock + */ +struct f2fs_super_block { + __le32 magic; /* Magic Number */ + __le16 major_ver; /* Major Version */ + __le16 minor_ver; /* Minor Version */ + __le32 log_sectorsize; /* log2 sector size in bytes */ + __le32 log_sectors_per_block; /* log2 # of sectors per block */ + __le32 log_blocksize; /* log2 block size in bytes */ + __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ + __le32 segs_per_sec; /* # of segments per section */ + __le32 secs_per_zone; /* # of sections per zone */ + __le32 checksum_offset; /* checksum offset inside super block */ + __le64 block_count; /* total # of user blocks */ + __le32 section_count; /* total # of sections */ + __le32 segment_count; /* total # of segments */ + __le32 segment_count_ckpt; /* # of segments for checkpoint */ + __le32 segment_count_sit; /* # of segments for SIT */ + __le32 segment_count_nat; /* # of segments for NAT */ + __le32 segment_count_ssa; /* # of segments for SSA */ + __le32 segment_count_main; /* # of segments for main area */ + __le32 segment0_blkaddr; /* start block address of segment 0 */ + __le32 cp_blkaddr; /* start block address of checkpoint */ + __le32 sit_blkaddr; /* start block address of SIT */ + __le32 nat_blkaddr; /* start block address of NAT */ + __le32 ssa_blkaddr; /* start block address of SSA */ + __le32 main_blkaddr; /* start block address of main area */ + __le32 root_ino; /* root inode number */ + __le32 node_ino; /* node inode number */ + __le32 meta_ino; /* meta inode number */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __le16 volume_name[512]; /* volume name */ + __le32 extension_count; /* # of extensions below */ + __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ +} __packed; + +/* + * For checkpoint + */ +#define CP_ERROR_FLAG 0x00000008 +#define CP_COMPACT_SUM_FLAG 0x00000004 +#define CP_ORPHAN_PRESENT_FLAG 0x00000002 +#define CP_UMOUNT_FLAG 0x00000001 + +struct f2fs_checkpoint { + __le64 checkpoint_ver; /* checkpoint block version number */ + __le64 user_block_count; /* # of user blocks */ + __le64 valid_block_count; /* # of valid blocks in main area */ + __le32 rsvd_segment_count; /* # of reserved segments for gc */ + __le32 overprov_segment_count; /* # of overprovision segments */ + __le32 free_segment_count; /* # of free segments in main area */ + + /* information of current node segments */ + __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; + __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; + /* information of current data segments */ + __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; + __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; + __le32 ckpt_flags; /* Flags : umount and journal_present */ + __le32 cp_pack_total_block_count; /* total # of one cp pack */ + __le32 cp_pack_start_sum; /* start block number of data summary */ + __le32 valid_node_count; /* Total number of valid nodes */ + __le32 valid_inode_count; /* Total number of valid inodes */ + __le32 next_free_nid; /* Next free node number */ + __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ + __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ + __le32 checksum_offset; /* checksum offset inside cp block */ + __le64 elapsed_time; /* mounted time */ + /* allocation type of current segment */ + unsigned char alloc_type[MAX_ACTIVE_LOGS]; + + /* SIT and NAT version bitmap */ + unsigned char sit_nat_version_bitmap[1]; +} __packed; + +/* + * For orphan inode management + */ +#define F2FS_ORPHANS_PER_BLOCK 1020 + +struct f2fs_orphan_block { + __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ + __le32 reserved; /* reserved */ + __le16 blk_addr; /* block index in current CP */ + __le16 blk_count; /* Number of orphan inode blocks in CP */ + __le32 entry_count; /* Total number of orphan nodes in current CP */ + __le32 check_sum; /* CRC32 for orphan inode block */ +} __packed; + +/* + * For NODE structure + */ +struct f2fs_extent { + __le32 fofs; /* start file offset of the extent */ + __le32 blk_addr; /* start block address of the extent */ + __le32 len; /* lengh of the extent */ +} __packed; + +#define F2FS_NAME_LEN 255 +#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_INODE(fi) addrs_per_inode(fi) +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) + +#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ + +struct f2fs_inode { + __le16 i_mode; /* file mode */ + __u8 i_advise; /* file hints */ + __u8 i_inline; /* file inline flags */ + __le32 i_uid; /* user ID */ + __le32 i_gid; /* group ID */ + __le32 i_links; /* links count */ + __le64 i_size; /* file size in bytes */ + __le64 i_blocks; /* file size in blocks */ + __le64 i_atime; /* access time */ + __le64 i_ctime; /* change time */ + __le64 i_mtime; /* modification time */ + __le32 i_atime_nsec; /* access time in nano scale */ + __le32 i_ctime_nsec; /* change time in nano scale */ + __le32 i_mtime_nsec; /* modification time in nano scale */ + __le32 i_generation; /* file version (for NFS) */ + __le32 i_current_depth; /* only for directory depth */ + __le32 i_xattr_nid; /* nid to save xattr */ + __le32 i_flags; /* file attributes */ + __le32 i_pino; /* parent inode number */ + __le32 i_namelen; /* file name length */ + __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ + __u8 i_reserved2; /* for backward compatibility */ + + struct f2fs_extent i_ext; /* caching a largest extent */ + + __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ + + __le32 i_nid[5]; /* direct(2), indirect(2), + double_indirect(1) node id */ +} __packed; + +struct direct_node { + __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ +} __packed; + +struct indirect_node { + __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ +} __packed; + +enum { + COLD_BIT_SHIFT = 0, + FSYNC_BIT_SHIFT, + DENT_BIT_SHIFT, + OFFSET_BIT_SHIFT +}; + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode nunmber */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +struct f2fs_node { + /* can be one of three types: inode, direct, and indirect types */ + union { + struct f2fs_inode i; + struct direct_node dn; + struct indirect_node in; + }; + struct node_footer footer; +} __packed; + +/* + * For NAT entries + */ +#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) + +struct f2fs_nat_entry { + __u8 version; /* latest version of cached nat entry */ + __le32 ino; /* inode number */ + __le32 block_addr; /* block address */ +} __packed; + +struct f2fs_nat_block { + struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For SIT entries + * + * Each segment is 2MB in size by default so that a bitmap for validity of + * there-in blocks should occupy 64 bytes, 512 bits. + * Not allow to change this. + */ +#define SIT_VBLOCK_MAP_SIZE 64 +#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) + +/* + * Note that f2fs_sit_entry->vblocks has the following bit-field information. + * [15:10] : allocation type such as CURSEG_XXXX_TYPE + * [9:0] : valid block count + */ +#define SIT_VBLOCKS_SHIFT 10 +#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) +#define GET_SIT_VBLOCKS(raw_sit) \ + (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) +#define GET_SIT_TYPE(raw_sit) \ + ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ + >> SIT_VBLOCKS_SHIFT) + +struct f2fs_sit_entry { + __le16 vblocks; /* reference above */ + __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ + __le64 mtime; /* segment age for cleaning */ +} __packed; + +struct f2fs_sit_block { + struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For segment summary + * + * One summary block contains exactly 512 summary entries, which represents + * exactly 2MB segment by default. Not allow to change the basic units. + * + * NOTE: For initializing fields, you must use set_summary + * + * - If data page, nid represents dnode's nid + * - If node page, nid represents the node page's nid. + * + * The ofs_in_node is used by only data page. It represents offset + * from node's page's beginning to get a data block address. + * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) + */ +#define ENTRIES_IN_SUM 512 +#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ +#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) + +/* a summary entry for a 4KB-sized block in a segment */ +struct f2fs_summary { + __le32 nid; /* parent node id */ + union { + __u8 reserved[3]; + struct { + __u8 version; /* node version number */ + __le16 ofs_in_node; /* block index in parent node */ + } __packed; + }; +} __packed; + +/* summary block type, node or data, is stored to the summary_footer */ +#define SUM_TYPE_NODE (1) +#define SUM_TYPE_DATA (0) + +struct summary_footer { + unsigned char entry_type; /* SUM_TYPE_XXX */ + __u32 check_sum; /* summary checksum */ +} __packed; + +#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ + SUM_ENTRY_SIZE) +#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct nat_journal_entry)) +#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct nat_journal_entry)) +#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct sit_journal_entry)) +#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct sit_journal_entry)) +/* + * frequently updated NAT/SIT entries can be stored in the spare area in + * summary blocks + */ +enum { + NAT_JOURNAL = 0, + SIT_JOURNAL +}; + +struct nat_journal_entry { + __le32 nid; + struct f2fs_nat_entry ne; +} __packed; + +struct nat_journal { + struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; + __u8 reserved[NAT_JOURNAL_RESERVED]; +} __packed; + +struct sit_journal_entry { + __le32 segno; + struct f2fs_sit_entry se; +} __packed; + +struct sit_journal { + struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; + __u8 reserved[SIT_JOURNAL_RESERVED]; +} __packed; + +/* 4KB-sized summary block structure */ +struct f2fs_summary_block { + struct f2fs_summary entries[ENTRIES_IN_SUM]; + union { + __le16 n_nats; + __le16 n_sits; + }; + /* spare area is used by NAT or SIT journals */ + union { + struct nat_journal nat_j; + struct sit_journal sit_j; + }; + struct summary_footer footer; +} __packed; + +/* + * For directory operations + */ +#define F2FS_DOT_HASH 0 +#define F2FS_DDOT_HASH F2FS_DOT_HASH +#define F2FS_MAX_HASH (~((0x3ULL) << 62)) +#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) + +typedef __le32 f2fs_hash_t; + +/* One directory entry slot covers 8bytes-long file name */ +#define F2FS_SLOT_LEN 8 +#define F2FS_SLOT_LEN_BITS 3 + +#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK 214 + +/* MAX level for dir lookup */ +#define MAX_DIR_HASH_DEPTH 63 + +#define SIZE_OF_DIR_ENTRY 11 /* by byte */ +#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ + BITS_PER_BYTE) +#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ + F2FS_SLOT_LEN) * \ + NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) + +/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ +struct f2fs_dir_entry { + __le32 hash_code; /* hash code of file name */ + __le32 ino; /* inode number */ + __le16 name_len; /* lengh of file name */ + __u8 file_type; /* file type */ +} __packed; + +/* 4KB-sized directory entry block */ +struct f2fs_dentry_block { + /* validity bitmap for directory entries in each block */ + __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; + __u8 reserved[SIZE_OF_RESERVED]; + struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; +} __packed; + +/* file types used in inode_info->flags */ +enum { + F2FS_FT_UNKNOWN, + F2FS_FT_REG_FILE, + F2FS_FT_DIR, + F2FS_FT_CHRDEV, + F2FS_FT_BLKDEV, + F2FS_FT_FIFO, + F2FS_FT_SOCK, + F2FS_FT_SYMLINK, + F2FS_FT_MAX +}; + +#endif /* _LINUX_F2FS_FS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c3b043a645..2e9d230ff95 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1743,6 +1743,19 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } +/** + * set_nlink - directly set an inode's link count + * @inode: inode + * @nlink: new nlink (should be non-zero) + * + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. + */ +static inline void set_nlink(struct inode *inode, unsigned int nlink) +{ + inode->i_nlink = nlink; +} + /** * inc_nlink - directly increment an inode's link count * @inode: inode diff --git a/include/linux/magic.h b/include/linux/magic.h index 1e5df2af8d8..2616b546e83 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -24,6 +24,7 @@ #define EXT4_SUPER_MAGIC 0xEF53 #define BTRFS_SUPER_MAGIC 0x9123683E #define NILFS_SUPER_MAGIC 0x3434 +#define F2FS_SUPER_MAGIC 0xF2F52010 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 diff --git a/include/linux/security.h b/include/linux/security.h index 95a6d8e24df..7de9c15ac01 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -36,6 +36,7 @@ #include #include #include +#include #include /* Maximum number of letters for an LSM name string */ @@ -147,6 +148,10 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +/* security_inode_init_security callback function to write xattrs */ +typedef int (*initxattrs) (struct inode *inode, + const struct xattr *xattr_array, void *fs_data); + #ifdef CONFIG_SECURITY struct security_mnt_opts { @@ -1715,6 +1720,9 @@ void security_inode_free(struct inode *inode); int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, char **name, void **value, size_t *len); +int security_new_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + initxattrs initxattrs, void *fs_data); int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); @@ -2067,7 +2075,16 @@ static inline int security_inode_init_security(struct inode *inode, void **value, size_t *len) { - return -EOPNOTSUPP; + return 0; +} + +static inline int security_new_inode_init_security(struct inode *inode, + struct inode *dir, + const struct qstr *qstr, + initxattrs initxattrs, + void *fs_data) +{ + return 0; } static inline int security_inode_create(struct inode *dir, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index aed54c50aa6..7a378662ddf 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -67,6 +67,12 @@ struct xattr_handler { size_t size, int flags, int handler_flags); }; +struct xattr { + char *name; + void *value; + size_t value_len; +}; + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h new file mode 100644 index 00000000000..52ae54828ed --- /dev/null +++ b/include/trace/events/f2fs.h @@ -0,0 +1,682 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM f2fs + +#if !defined(_TRACE_F2FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_F2FS_H + +#include + +#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) +#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino + +#define show_block_type(type) \ + __print_symbolic(type, \ + { NODE, "NODE" }, \ + { DATA, "DATA" }, \ + { META, "META" }, \ + { META_FLUSH, "META_FLUSH" }) + +#define show_bio_type(type) \ + __print_symbolic(type, \ + { READ, "READ" }, \ + { READA, "READAHEAD" }, \ + { READ_SYNC, "READ_SYNC" }, \ + { WRITE, "WRITE" }, \ + { WRITE_SYNC, "WRITE_SYNC" }, \ + { WRITE_FLUSH, "WRITE_FLUSH" }, \ + { WRITE_FUA, "WRITE_FUA" }) + +#define show_data_type(type) \ + __print_symbolic(type, \ + { CURSEG_HOT_DATA, "Hot DATA" }, \ + { CURSEG_WARM_DATA, "Warm DATA" }, \ + { CURSEG_COLD_DATA, "Cold DATA" }, \ + { CURSEG_HOT_NODE, "Hot NODE" }, \ + { CURSEG_WARM_NODE, "Warm NODE" }, \ + { CURSEG_COLD_NODE, "Cold NODE" }, \ + { NO_CHECK_TYPE, "No TYPE" }) + +#define show_gc_type(type) \ + __print_symbolic(type, \ + { FG_GC, "Foreground GC" }, \ + { BG_GC, "Background GC" }) + +#define show_alloc_mode(type) \ + __print_symbolic(type, \ + { LFS, "LFS-mode" }, \ + { SSR, "SSR-mode" }) + +#define show_victim_policy(type) \ + __print_symbolic(type, \ + { GC_GREEDY, "Greedy" }, \ + { GC_CB, "Cost-Benefit" }) + +struct victim_sel_policy; + +DECLARE_EVENT_CLASS(f2fs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(ino_t, pino) + __field(umode_t, mode) + __field(loff_t, size) + __field(unsigned int, nlink) + __field(blkcnt_t, blocks) + __field(__u8, advise) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pino = F2FS_I(inode)->i_pino; + __entry->mode = inode->i_mode; + __entry->nlink = inode->i_nlink; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->advise = F2FS_I(inode)->i_advise; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pino = %lu, i_mode = 0x%hx, " + "i_size = %lld, i_nlink = %u, i_blocks = %llu, i_advise = 0x%x", + show_dev_ino(__entry), + (unsigned long)__entry->pino, + __entry->mode, + __entry->size, + (unsigned int)__entry->nlink, + (unsigned long long)__entry->blocks, + (unsigned char)__entry->advise) +); + +DECLARE_EVENT_CLASS(f2fs__inode_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, ret = %d", + show_dev_ino(__entry), + __entry->ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_sync_file_exit, + + TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), + + TP_ARGS(inode, need_cp, datasync, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(bool, need_cp) + __field(int, datasync) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->need_cp = need_cp; + __entry->datasync = datasync; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " + "datasync = %d, ret = %d", + show_dev_ino(__entry), + __entry->need_cp ? "needed" : "not needed", + __entry->datasync, + __entry->ret) +); + +TRACE_EVENT(f2fs_sync_fs, + + TP_PROTO(struct super_block *sb, int wait), + + TP_ARGS(sb, wait), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, dirty) + __field(int, wait) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->dirty = F2FS_SB(sb)->s_dirty; + __entry->wait = wait; + ), + + TP_printk("dev = (%d,%d), superblock is %s, wait = %d", + show_dev(__entry), + __entry->dirty ? "dirty" : "not dirty", + __entry->wait) +); + +DEFINE_EVENT(f2fs__inode, f2fs_iget, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_iget_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_evict_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +TRACE_EVENT(f2fs_unlink_enter, + + TP_PROTO(struct inode *dir, struct dentry *dentry), + + TP_ARGS(dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(const char *, name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->size = dir->i_size; + __entry->blocks = dir->i_blocks; + __entry->name = dentry->d_name.name; + ), + + TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, " + "i_blocks = %llu, name = %s", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + __entry->name) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_truncate, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_truncate_data_blocks_range, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs, int free), + + TP_ARGS(inode, nid, ofs, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(unsigned int, ofs) + __field(int, free) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->ofs = ofs; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, offset = %u, freed = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + __entry->ofs, + __entry->free) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_op, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(u64, from) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->from = from; + ), + + TP_printk("dev = (%d,%d), ino = %lu, i_size = %lld, i_blocks = %llu, " + "start file offset = %llu", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + (unsigned long long)__entry->from) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_inode_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_inode_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(block_t, blk_addr) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->blk_addr = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, block_address = 0x%llx", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + (unsigned long long)__entry->blk_addr) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_nodes_enter, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_nodes_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +TRACE_EVENT(f2fs_truncate_partial_nodes, + + TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), + + TP_ARGS(inode, nid, depth, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid[3]) + __field(int, depth) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid[0] = nid[0]; + __entry->nid[1] = nid[1]; + __entry->nid[2] = nid[2]; + __entry->depth = depth; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), ino = %lu, " + "nid[0] = %u, nid[1] = %u, nid[2] = %u, depth = %d, err = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid[0], + (unsigned int)__entry->nid[1], + (unsigned int)__entry->nid[2], + __entry->depth, + __entry->err) +); + +TRACE_EVENT_CONDITION(f2fs_readpage, + + TP_PROTO(struct page *page, sector_t blkaddr, int type), + + TP_ARGS(page, blkaddr, type), + + TP_CONDITION(page->mapping), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(sector_t, blkaddr) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = blkaddr; + __entry->type = type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, bio_type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->type)) +); + +TRACE_EVENT(f2fs_get_data_block, + TP_PROTO(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int ret), + + TP_ARGS(inode, iblock, bh, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(sector_t, iblock) + __field(sector_t, bh_start) + __field(size_t, bh_size) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->iblock = iblock; + __entry->bh_start = bh->b_blocknr; + __entry->bh_size = bh->b_size; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " + "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", + show_dev_ino(__entry), + (unsigned long long)__entry->iblock, + (unsigned long long)__entry->bh_start, + (unsigned long long)__entry->bh_size, + __entry->ret) +); + +TRACE_EVENT(f2fs_get_victim, + + TP_PROTO(struct super_block *sb, int type, int gc_type, + struct victim_sel_policy *p, unsigned int pre_victim, + unsigned int prefree, unsigned int free), + + TP_ARGS(sb, type, gc_type, p, pre_victim, prefree, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, gc_type) + __field(int, alloc_mode) + __field(int, gc_mode) + __field(unsigned int, victim) + __field(unsigned int, ofs_unit) + __field(unsigned int, pre_victim) + __field(unsigned int, prefree) + __field(unsigned int, free) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->type = type; + __entry->gc_type = gc_type; + __entry->alloc_mode = p->alloc_mode; + __entry->gc_mode = p->gc_mode; + __entry->victim = p->min_segno; + __entry->ofs_unit = p->ofs_unit; + __entry->pre_victim = pre_victim; + __entry->prefree = prefree; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " + "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", + show_dev(__entry), + show_data_type(__entry->type), + show_gc_type(__entry->gc_type), + show_alloc_mode(__entry->alloc_mode), + show_victim_policy(__entry->gc_mode), + __entry->victim, + __entry->ofs_unit, + (int)__entry->pre_victim, + __entry->prefree, + __entry->free) +); + +TRACE_EVENT(f2fs_fallocate, + + TP_PROTO(struct inode *inode, int mode, + loff_t offset, loff_t len, int ret), + + TP_ARGS(inode, mode, offset, len, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, mode) + __field(loff_t, offset) + __field(loff_t, len) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = mode; + __entry->offset = offset; + __entry->len = len; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, mode = %x, offset = %lld, " + "len = %lld, i_size = %lld, i_blocks = %llu, ret = %d", + show_dev_ino(__entry), + __entry->mode, + (unsigned long long)__entry->offset, + (unsigned long long)__entry->len, + (unsigned long long)__entry->size, + (unsigned long long)__entry->blocks, + __entry->ret) +); + +TRACE_EVENT(f2fs_reserve_new_block, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), + + TP_ARGS(inode, nid, ofs_in_node), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(nid_t, nid) + __field(unsigned int, ofs_in_node) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = nid; + __entry->ofs_in_node = ofs_in_node; + ), + + TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", + show_dev(__entry), + (unsigned int)__entry->nid, + __entry->ofs_in_node) +); + +TRACE_EVENT(f2fs_do_submit_bio, + + TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), + + TP_ARGS(sb, btype, sync, bio), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, btype) + __field(bool, sync) + __field(sector_t, sector) + __field(unsigned int, size) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->btype = btype; + __entry->sync = sync; + __entry->sector = bio->bi_sector; + __entry->size = bio->bi_size; + ), + + TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", + show_dev(__entry), + show_block_type(__entry->btype), + __entry->sync ? "sync" : "no sync", + (unsigned long long)__entry->sector, + __entry->size) +); + +TRACE_EVENT(f2fs_submit_write_page, + + TP_PROTO(struct page *page, block_t blk_addr, int type), + + TP_ARGS(page, blk_addr, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, type) + __field(pgoff_t, index) + __field(block_t, block) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->type = type; + __entry->index = page->index; + __entry->block = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", + show_dev_ino(__entry), + show_block_type(__entry->type), + (unsigned long)__entry->index, + (unsigned long long)__entry->block) +); + +TRACE_EVENT(f2fs_write_checkpoint, + + TP_PROTO(struct super_block *sb, bool is_umount, char *msg), + + TP_ARGS(sb, is_umount, msg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(bool, is_umount) + __field(char *, msg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->is_umount = is_umount; + __entry->msg = msg; + ), + + TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", + show_dev(__entry), + __entry->is_umount ? "clean umount" : "consistency", + __entry->msg) +); + +#endif /* _TRACE_F2FS_H */ + + /* This part must be outside protection */ +#include diff --git a/security/security.c b/security/security.c index 420198e5f32..0dc000974ec 100644 --- a/security/security.c +++ b/security/security.c @@ -18,6 +18,8 @@ #include #include +#define MAX_LSM_XATTR 1 + /* Boot-time LSM user choice */ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = CONFIG_DEFAULT_SECURITY; @@ -369,6 +371,37 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, } EXPORT_SYMBOL(security_inode_init_security); +int security_new_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + const initxattrs initxattrs, void *fs_data) +{ + struct xattr new_xattrs[MAX_LSM_XATTR + 1]; + struct xattr *lsm_xattr; + int ret; + + if (unlikely(IS_PRIVATE(inode))) + return -EOPNOTSUPP; + + memset(new_xattrs, 0, sizeof new_xattrs); + if (!initxattrs) + return security_ops->inode_init_security(inode, dir, qstr, + NULL, NULL, NULL); + lsm_xattr = new_xattrs; + ret = security_ops->inode_init_security(inode, dir, qstr, + &lsm_xattr->name, + &lsm_xattr->value, + &lsm_xattr->value_len); + if (ret) + goto out; + ret = initxattrs(inode, new_xattrs, fs_data); +out: + kfree(lsm_xattr->name); + kfree(lsm_xattr->value); + + return (ret == -EOPNOTSUPP) ? 0 : ret; +} +EXPORT_SYMBOL(security_new_inode_init_security); + #ifdef CONFIG_SECURITY_PATH int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, unsigned int dev) From b945de80db7cca2d4e9cfb1c05323b26768a31ec Mon Sep 17 00:00:00 2001 From: Metallice Date: Sun, 6 Apr 2014 11:49:21 -0400 Subject: [PATCH 617/678] Revert "F2FS for /data to improve IO Performance (2/2)" This reverts commit 183c93be601a67ff4e929a770a28e68ced07372a. --- Documentation/filesystems/00-INDEX | 2 - Documentation/filesystems/f2fs.txt | 502 ----- arch/arm/configs/metallice_grouper_defconfig | 3 - arch/arm/configs/tegra3_android_defconfig | 3 - fs/Kconfig | 1 - fs/Makefile | 1 - fs/dcache.c | 4 +- fs/f2fs/Kconfig | 65 - fs/f2fs/Makefile | 7 - fs/f2fs/acl.c | 423 ---- fs/f2fs/acl.h | 57 - fs/f2fs/checkpoint.c | 860 -------- fs/f2fs/data.c | 790 -------- fs/f2fs/debug.c | 353 ---- fs/f2fs/dir.c | 714 ------- fs/f2fs/f2fs.h | 1290 ------------ fs/f2fs/file.c | 725 ------- fs/f2fs/gc.c | 738 ------- fs/f2fs/gc.h | 110 -- fs/f2fs/hash.c | 101 - fs/f2fs/inode.c | 273 --- fs/f2fs/namei.c | 557 ------ fs/f2fs/node.c | 1859 ------------------ fs/f2fs/node.h | 345 ---- fs/f2fs/recovery.c | 502 ----- fs/f2fs/segment.c | 1787 ----------------- fs/f2fs/segment.h | 637 ------ fs/f2fs/super.c | 1154 ----------- fs/f2fs/xattr.c | 600 ------ fs/f2fs/xattr.h | 152 -- include/linux/dcache.h | 1 - include/linux/f2fs_fs.h | 424 ---- include/linux/fs.h | 13 - include/linux/magic.h | 1 - include/linux/security.h | 19 +- include/linux/xattr.h | 6 - include/trace/events/f2fs.h | 682 ------- security/security.c | 33 - 38 files changed, 3 insertions(+), 15791 deletions(-) delete mode 100644 Documentation/filesystems/f2fs.txt delete mode 100644 fs/f2fs/Kconfig delete mode 100644 fs/f2fs/Makefile delete mode 100644 fs/f2fs/acl.c delete mode 100644 fs/f2fs/acl.h delete mode 100644 fs/f2fs/checkpoint.c delete mode 100644 fs/f2fs/data.c delete mode 100644 fs/f2fs/debug.c delete mode 100644 fs/f2fs/dir.c delete mode 100644 fs/f2fs/f2fs.h delete mode 100644 fs/f2fs/file.c delete mode 100644 fs/f2fs/gc.c delete mode 100644 fs/f2fs/gc.h delete mode 100644 fs/f2fs/hash.c delete mode 100644 fs/f2fs/inode.c delete mode 100644 fs/f2fs/namei.c delete mode 100644 fs/f2fs/node.c delete mode 100644 fs/f2fs/node.h delete mode 100644 fs/f2fs/recovery.c delete mode 100644 fs/f2fs/segment.c delete mode 100644 fs/f2fs/segment.h delete mode 100644 fs/f2fs/super.c delete mode 100644 fs/f2fs/xattr.c delete mode 100644 fs/f2fs/xattr.h delete mode 100644 include/linux/f2fs_fs.h delete mode 100644 include/trace/events/f2fs.h diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index ce5fd467791..8c624a18f67 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -48,8 +48,6 @@ ext4.txt - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. -f2fs.txt - - info and mount options for the F2FS filesystem. fuse.txt - info on the Filesystem in User SpacE including mount options. gfs2.txt diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt deleted file mode 100644 index d225139cacf..00000000000 --- a/Documentation/filesystems/f2fs.txt +++ /dev/null @@ -1,502 +0,0 @@ -================================================================================ -WHAT IS Flash-Friendly File System (F2FS)? -================================================================================ - -NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have -been equipped on a variety systems ranging from mobile to server systems. Since -they are known to have different characteristics from the conventional rotating -disks, a file system, an upper layer to the storage device, should adapt to the -changes from the sketch in the design level. - -F2FS is a file system exploiting NAND flash memory-based storage devices, which -is based on Log-structured File System (LFS). The design has been focused on -addressing the fundamental issues in LFS, which are snowball effect of wandering -tree and high cleaning overhead. - -Since a NAND flash memory-based storage device shows different characteristic -according to its internal geometry or flash memory management scheme, namely FTL, -F2FS and its tools support various parameters not only for configuring on-disk -layout, but also for selecting allocation and cleaning algorithms. - -The following git tree provides the file system formatting tool (mkfs.f2fs), -a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). ->> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git - -For reporting bugs and sending patches, please use the following mailing list: ->> linux-f2fs-devel@lists.sourceforge.net - -================================================================================ -BACKGROUND AND DESIGN ISSUES -================================================================================ - -Log-structured File System (LFS) --------------------------------- -"A log-structured file system writes all modifications to disk sequentially in -a log-like structure, thereby speeding up both file writing and crash recovery. -The log is the only structure on disk; it contains indexing information so that -files can be read back from the log efficiently. In order to maintain large free -areas on disk for fast writing, we divide the log into segments and use a -segment cleaner to compress the live information from heavily fragmented -segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and -implementation of a log-structured file system", ACM Trans. Computer Systems -10, 1, 26–52. - -Wandering Tree Problem ----------------------- -In LFS, when a file data is updated and written to the end of log, its direct -pointer block is updated due to the changed location. Then the indirect pointer -block is also updated due to the direct pointer block update. In this manner, -the upper index structures such as inode, inode map, and checkpoint block are -also updated recursively. This problem is called as wandering tree problem [1], -and in order to enhance the performance, it should eliminate or relax the update -propagation as much as possible. - -[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ - -Cleaning Overhead ------------------ -Since LFS is based on out-of-place writes, it produces so many obsolete blocks -scattered across the whole storage. In order to serve new empty log space, it -needs to reclaim these obsolete blocks seamlessly to users. This job is called -as a cleaning process. - -The process consists of three operations as follows. -1. A victim segment is selected through referencing segment usage table. -2. It loads parent index structures of all the data in the victim identified by - segment summary blocks. -3. It checks the cross-reference between the data and its parent index structure. -4. It moves valid data selectively. - -This cleaning job may cause unexpected long delays, so the most important goal -is to hide the latencies to users. And also definitely, it should reduce the -amount of valid data to be moved, and move them quickly as well. - -================================================================================ -KEY FEATURES -================================================================================ - -Flash Awareness ---------------- -- Enlarge the random write area for better performance, but provide the high - spatial locality -- Align FS data structures to the operational units in FTL as best efforts - -Wandering Tree Problem ----------------------- -- Use a term, “nodeâ€, that represents inodes as well as various pointer blocks -- Introduce Node Address Table (NAT) containing the locations of all the “node†- blocks; this will cut off the update propagation. - -Cleaning Overhead ------------------ -- Support a background cleaning process -- Support greedy and cost-benefit algorithms for victim selection policies -- Support multi-head logs for static/dynamic hot and cold data separation -- Introduce adaptive logging for efficient block allocation - -================================================================================ -MOUNT OPTIONS -================================================================================ - -background_gc=%s Turn on/off cleaning operations, namely garbage - collection, triggered in background when I/O subsystem is - idle. If background_gc=on, it will turn on the garbage - collection and if background_gc=off, garbage collection - will be truned off. - Default value for this option is on. So garbage - collection is on by default. -disable_roll_forward Disable the roll-forward recovery routine -discard Issue discard/TRIM commands when a segment is cleaned. -no_heap Disable heap-style segment allocation which finds free - segments for data from the beginning of main area, while - for node from the end of main area. -nouser_xattr Disable Extended User Attributes. Note: xattr is enabled - by default if CONFIG_F2FS_FS_XATTR is selected. -noacl Disable POSIX Access Control List. Note: acl is enabled - by default if CONFIG_F2FS_FS_POSIX_ACL is selected. -active_logs=%u Support configuring the number of active logs. In the - current design, f2fs supports only 2, 4, and 6 logs. - Default number is 6. -disable_ext_identify Disable the extension list configured by mkfs, so f2fs - does not aware of cold files such as media files. -inline_xattr Enable the inline xattrs feature. - -================================================================================ -DEBUGFS ENTRIES -================================================================================ - -/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as -f2fs. Each file shows the whole f2fs information. - -/sys/kernel/debug/f2fs/status includes: - - major file system information managed by f2fs currently - - average SIT information about whole segments - - current memory footprint consumed by f2fs. - -================================================================================ -SYSFS ENTRIES -================================================================================ - -Information about mounted f2f2 file systems can be found in -/sys/fs/f2fs. Each mounted filesystem will have a directory in -/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). -The files in each per-device directory are shown in table below. - -Files in /sys/fs/f2fs/ -(see also Documentation/ABI/testing/sysfs-fs-f2fs) -.............................................................................. - File Content - - gc_max_sleep_time This tuning parameter controls the maximum sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_min_sleep_time This tuning parameter controls the minimum sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_no_gc_sleep_time This tuning parameter controls the default sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_idle This parameter controls the selection of victim - policy for garbage collection. Setting gc_idle = 0 - (default) will disable this option. Setting - gc_idle = 1 will select the Cost Benefit approach - & setting gc_idle = 2 will select the greedy aproach. - - reclaim_segments This parameter controls the number of prefree - segments to be reclaimed. If the number of prefree - segments is larger than this number, f2fs tries to - conduct checkpoint to reclaim the prefree segments - to free segments. By default, 100 segments, 200MB. - -================================================================================ -USAGE -================================================================================ - -1. Download userland tools and compile them. - -2. Skip, if f2fs was compiled statically inside kernel. - Otherwise, insert the f2fs.ko module. - # insmod f2fs.ko - -3. Create a directory trying to mount - # mkdir /mnt/f2fs - -4. Format the block device, and then mount as f2fs - # mkfs.f2fs -l label /dev/block_device - # mount -t f2fs /dev/block_device /mnt/f2fs - -mkfs.f2fs ---------- -The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, -which builds a basic on-disk layout. - -The options consist of: --l [label] : Give a volume label, up to 512 unicode name. --a [0 or 1] : Split start location of each area for heap-based allocation. - 1 is set by default, which performs this. --o [int] : Set overprovision ratio in percent over volume size. - 5 is set by default. --s [int] : Set the number of segments per section. - 1 is set by default. --z [int] : Set the number of sections per zone. - 1 is set by default. --e [str] : Set basic extension list. e.g. "mp3,gif,mov" --t [0 or 1] : Disable discard command or not. - 1 is set by default, which conducts discard. - -fsck.f2fs ---------- -The fsck.f2fs is a tool to check the consistency of an f2fs-formatted -partition, which examines whether the filesystem metadata and user-made data -are cross-referenced correctly or not. -Note that, initial version of the tool does not fix any inconsistency. - -The options consist of: - -d debug level [default:0] - -dump.f2fs ---------- -The dump.f2fs shows the information of specific inode and dumps SSA and SIT to -file. Each file is dump_ssa and dump_sit. - -The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. -It shows on-disk inode information reconized by a given inode number, and is -able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and -./dump_sit respectively. - -The options consist of: - -d debug level [default:0] - -i inode no (hex) - -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] - -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] - -Examples: -# dump.f2fs -i [ino] /dev/sdx -# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) -# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) - -================================================================================ -DESIGN -================================================================================ - -On-disk Layout --------------- - -F2FS divides the whole volume into a number of segments, each of which is fixed -to 2MB in size. A section is composed of consecutive segments, and a zone -consists of a set of sections. By default, section and zone sizes are set to one -segment size identically, but users can easily modify the sizes by mkfs. - -F2FS splits the entire volume into six areas, and all the areas except superblock -consists of multiple segments as described below. - - align with the zone size <-| - |-> align with the segment size - _________________________________________________________________________ - | | | Segment | Node | Segment | | - | Superblock | Checkpoint | Info. | Address | Summary | Main | - | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | | - |____________|_____2______|______N______|______N______|______N_____|__N___| - . . - . . - . . - ._________________________________________. - |_Segment_|_..._|_Segment_|_..._|_Segment_| - . . - ._________._________ - |_section_|__...__|_ - . . - .________. - |__zone__| - -- Superblock (SB) - : It is located at the beginning of the partition, and there exist two copies - to avoid file system crash. It contains basic partition information and some - default parameters of f2fs. - -- Checkpoint (CP) - : It contains file system information, bitmaps for valid NAT/SIT sets, orphan - inode lists, and summary entries of current active segments. - -- Segment Information Table (SIT) - : It contains segment information such as valid block count and bitmap for the - validity of all the blocks. - -- Node Address Table (NAT) - : It is composed of a block address table for all the node blocks stored in - Main area. - -- Segment Summary Area (SSA) - : It contains summary entries which contains the owner information of all the - data and node blocks stored in Main area. - -- Main Area - : It contains file and directory data including their indices. - -In order to avoid misalignment between file system and flash-based storage, F2FS -aligns the start block address of CP with the segment size. Also, it aligns the -start block address of Main area with the zone size by reserving some segments -in SSA area. - -Reference the following survey for additional technical details. -https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey - -File System Metadata Structure ------------------------------- - -F2FS adopts the checkpointing scheme to maintain file system consistency. At -mount time, F2FS first tries to find the last valid checkpoint data by scanning -CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. -One of them always indicates the last valid data, which is called as shadow copy -mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. - -For file system consistency, each CP points to which NAT and SIT copies are -valid, as shown as below. - - +--------+----------+---------+ - | CP | SIT | NAT | - +--------+----------+---------+ - . . . . - . . . . - . . . . - +-------+-------+--------+--------+--------+--------+ - | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 | - +-------+-------+--------+--------+--------+--------+ - | ^ ^ - | | | - `----------------------------------------' - -Index Structure ---------------- - -The key data structure to manage the data locations is a "node". Similar to -traditional file structures, F2FS has three types of node: inode, direct node, -indirect node. F2FS assigns 4KB to an inode block which contains 923 data block -indices, two direct node pointers, two indirect node pointers, and one double -indirect node pointer as described below. One direct node block contains 1018 -data blocks, and one indirect node block contains also 1018 node blocks. Thus, -one inode block (i.e., a file) covers: - - 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. - - Inode block (4KB) - |- data (923) - |- direct node (2) - | `- data (1018) - |- indirect node (2) - | `- direct node (1018) - | `- data (1018) - `- double indirect node (1) - `- indirect node (1018) - `- direct node (1018) - `- data (1018) - -Note that, all the node blocks are mapped by NAT which means the location of -each node is translated by the NAT table. In the consideration of the wandering -tree problem, F2FS is able to cut off the propagation of node updates caused by -leaf data writes. - -Directory Structure -------------------- - -A directory entry occupies 11 bytes, which consists of the following attributes. - -- hash hash value of the file name -- ino inode number -- len the length of file name -- type file type such as directory, symlink, etc - -A dentry block consists of 214 dentry slots and file names. Therein a bitmap is -used to represent whether each dentry is valid or not. A dentry block occupies -4KB with the following composition. - - Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + - dentries(11 * 214 bytes) + file name (8 * 214 bytes) - - [Bucket] - +--------------------------------+ - |dentry block 1 | dentry block 2 | - +--------------------------------+ - . . - . . - . [Dentry Block Structure: 4KB] . - +--------+----------+----------+------------+ - | bitmap | reserved | dentries | file names | - +--------+----------+----------+------------+ - [Dentry Block: 4KB] . . - . . - . . - +------+------+-----+------+ - | hash | ino | len | type | - +------+------+-----+------+ - [Dentry Structure: 11 bytes] - -F2FS implements multi-level hash tables for directory structure. Each level has -a hash table with dedicated number of hash buckets as shown below. Note that -"A(2B)" means a bucket includes 2 data blocks. - ----------------------- -A : bucket -B : block -N : MAX_DIR_HASH_DEPTH ----------------------- - -level #0 | A(2B) - | -level #1 | A(2B) - A(2B) - | -level #2 | A(2B) - A(2B) - A(2B) - A(2B) - . | . . . . -level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) - . | . . . . -level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) - -The number of blocks and buckets are determined by, - - ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, - # of blocks in level #n = | - `- 4, Otherwise - - ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, - # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise - -When F2FS finds a file name in a directory, at first a hash value of the file -name is calculated. Then, F2FS scans the hash table in level #0 to find the -dentry consisting of the file name and its inode number. If not found, F2FS -scans the next hash table in level #1. In this way, F2FS scans hash tables in -each levels incrementally from 1 to N. In each levels F2FS needs to scan only -one bucket determined by the following equation, which shows O(log(# of files)) -complexity. - - bucket number to scan in level #n = (hash value) % (# of buckets in level #n) - -In the case of file creation, F2FS finds empty consecutive slots that cover the -file name. F2FS searches the empty slots in the hash tables of whole levels from -1 to N in the same way as the lookup operation. - -The following figure shows an example of two cases holding children. - --------------> Dir <-------------- - | | - child child - - child - child [hole] - child - - child - child - child [hole] - [hole] - child - - Case 1: Case 2: - Number of children = 6, Number of children = 3, - File size = 7 File size = 7 - -Default Block Allocation ------------------------- - -At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node -and Hot/Warm/Cold data. - -- Hot node contains direct node blocks of directories. -- Warm node contains direct node blocks except hot node blocks. -- Cold node contains indirect node blocks -- Hot data contains dentry blocks -- Warm data contains data blocks except hot and cold data blocks -- Cold data contains multimedia data or migrated data blocks - -LFS has two schemes for free space management: threaded log and copy-and-compac- -tion. The copy-and-compaction scheme which is known as cleaning, is well-suited -for devices showing very good sequential write performance, since free segments -are served all the time for writing new data. However, it suffers from cleaning -overhead under high utilization. Contrarily, the threaded log scheme suffers -from random writes, but no cleaning process is needed. F2FS adopts a hybrid -scheme where the copy-and-compaction scheme is adopted by default, but the -policy is dynamically changed to the threaded log scheme according to the file -system status. - -In order to align F2FS with underlying flash-based storage, F2FS allocates a -segment in a unit of section. F2FS expects that the section size would be the -same as the unit size of garbage collection in FTL. Furthermore, with respect -to the mapping granularity in FTL, F2FS allocates each section of the active -logs from different zones as much as possible, since FTL can write the data in -the active logs into one allocation unit according to its mapping granularity. - -Cleaning process ----------------- - -F2FS does cleaning both on demand and in the background. On-demand cleaning is -triggered when there are not enough free segments to serve VFS calls. Background -cleaner is operated by a kernel thread, and triggers the cleaning job when the -system is idle. - -F2FS supports two victim selection policies: greedy and cost-benefit algorithms. -In the greedy algorithm, F2FS selects a victim segment having the smallest number -of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment -according to the segment age and the number of valid blocks in order to address -log block thrashing problem in the greedy algorithm. F2FS adopts the greedy -algorithm for on-demand cleaner, while background cleaner adopts cost-benefit -algorithm. - -In order to identify whether the data in the victim segment are valid or not, -F2FS manages a bitmap. Each bit represents the validity of a block, and the -bitmap is composed of a bit stream covering whole blocks in main area. \ No newline at end of file diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 7381367c327..241c0b045cd 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -3027,9 +3027,6 @@ CONFIG_PROC_PAGE_MONITOR=y CONFIG_REPORT_PRESENT_CPUS=y CONFIG_SYSFS=y CONFIG_TMPFS=y -CONFIG_F2FS_FS=y -CONFIG_F2FS_FS_XATTR=y -CONFIG_F2FS_FS_SECURITY=y # CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_TMPFS_XATTR is not set # CONFIG_HUGETLB_PAGE is not set diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 26f4a860a0e..4b859b32d14 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -482,9 +482,6 @@ CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y -CONFIG_F2FS_FS=y -CONFIG_F2FS_FS_XATTR=y -CONFIG_F2FS_FS_SECURITY=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_PARTITION_ADVANCED=y diff --git a/fs/Kconfig b/fs/Kconfig index aebcee21e5d..3130a45eafa 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -219,7 +219,6 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" -source "fs/f2fs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index 63e532972b3..cd17b767c56 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -120,7 +120,6 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ -obj-$(CONFIG_F2FS_FS) += f2fs/ obj-$(CONFIG_EXOFS_FS) += exofs/ obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ diff --git a/fs/dcache.c b/fs/dcache.c index 239f5e664aa..8b732a205d5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1438,7 +1438,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) return alias; } -struct dentry * d_find_any_alias(struct inode *inode) +static struct dentry * d_find_any_alias(struct inode *inode) { struct dentry *de; @@ -1447,7 +1447,7 @@ struct dentry * d_find_any_alias(struct inode *inode) spin_unlock(&inode->i_lock); return de; } -EXPORT_SYMBOL(d_find_any_alias); + /** * d_obtain_alias - find or allocate a dentry for a given inode diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig deleted file mode 100644 index e06e0995e00..00000000000 --- a/fs/f2fs/Kconfig +++ /dev/null @@ -1,65 +0,0 @@ -config F2FS_FS - tristate "F2FS filesystem support (EXPERIMENTAL)" - depends on BLOCK - help - F2FS is based on Log-structured File System (LFS), which supports - versatile "flash-friendly" features. The design has been focused on - addressing the fundamental issues in LFS, which are snowball effect - of wandering tree and high cleaning overhead. - - Since flash-based storages show different characteristics according to - the internal geometry or flash memory management schemes aka FTL, F2FS - and tools support various parameters not only for configuring on-disk - layout, but also for selecting allocation and cleaning algorithms. - - If unsure, say N. - -config F2FS_STAT_FS - bool "F2FS Status Information" - depends on F2FS_FS && DEBUG_FS - default y - help - /sys/kernel/debug/f2fs/ contains information about all the partitions - mounted as f2fs. Each file shows the whole f2fs information. - - /sys/kernel/debug/f2fs/status includes: - - major file system information managed by f2fs currently - - average SIT information about whole segments - - current memory footprint consumed by f2fs. - -config F2FS_FS_XATTR - bool "F2FS extended attributes" - depends on F2FS_FS - default y - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - for details). - - If unsure, say N. - -config F2FS_FS_POSIX_ACL - bool "F2FS Access Control Lists" - depends on F2FS_FS_XATTR - select FS_POSIX_ACL - default y - help - Posix Access Control Lists (ACLs) support permissions for users and - gourps beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website . - - If you don't know what Access Control Lists are, say N - -config F2FS_FS_SECURITY - bool "F2FS Security Labels" - depends on F2FS_FS_XATTR - help - Security labels provide an access control facility to support Linux - Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO - Linux. This option enables an extended attribute handler for file - security labels in the f2fs filesystem, so that it requires enabling - the extended attribute support in advance. - - If you are not using a security module, say N. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile deleted file mode 100644 index 27a0820340b..00000000000 --- a/fs/f2fs/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -obj-$(CONFIG_F2FS_FS) += f2fs.o - -f2fs-y := dir.o file.o inode.o namei.o hash.o super.o -f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o -f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o -f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o -f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c deleted file mode 100644 index b20ced33113..00000000000 --- a/fs/f2fs/acl.c +++ /dev/null @@ -1,423 +0,0 @@ -/* - * fs/f2fs/acl.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * Portions of this code from linux/fs/ext2/acl.c - * - * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include "f2fs.h" -#include "xattr.h" -#include "acl.h" - -#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ - (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) - -static inline size_t f2fs_acl_size(int count) -{ - if (count <= 4) { - return sizeof(struct f2fs_acl_header) + - count * sizeof(struct f2fs_acl_entry_short); - } else { - return sizeof(struct f2fs_acl_header) + - 4 * sizeof(struct f2fs_acl_entry_short) + - (count - 4) * sizeof(struct f2fs_acl_entry); - } -} - -static inline int f2fs_acl_count(size_t size) -{ - ssize_t s; - size -= sizeof(struct f2fs_acl_header); - s = size - 4 * sizeof(struct f2fs_acl_entry_short); - if (s < 0) { - if (size % sizeof(struct f2fs_acl_entry_short)) - return -1; - return size / sizeof(struct f2fs_acl_entry_short); - } else { - if (s % sizeof(struct f2fs_acl_entry)) - return -1; - return s / sizeof(struct f2fs_acl_entry) + 4; - } -} - -static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) -{ - int i, count; - struct posix_acl *acl; - struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; - struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); - const char *end = value + size; - - if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) - return ERR_PTR(-EINVAL); - - count = f2fs_acl_count(size); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; - - acl = posix_acl_alloc(count, GFP_KERNEL); - if (!acl) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < count; i++) { - - if ((char *)entry > end) - goto fail; - - acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); - acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); - - switch (acl->a_entries[i].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - entry = (struct f2fs_acl_entry *)((char *)entry + - sizeof(struct f2fs_acl_entry_short)); - break; - - case ACL_USER: - case ACL_GROUP: - acl->a_entries[i].e_id = le32_to_cpu(entry->e_id); - entry = (struct f2fs_acl_entry *)((char *)entry + - sizeof(struct f2fs_acl_entry)); - break; - default: - goto fail; - } - } - if ((char *)entry != end) - goto fail; - return acl; -fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) -{ - struct f2fs_acl_header *f2fs_acl; - struct f2fs_acl_entry *entry; - int i; - - f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * - sizeof(struct f2fs_acl_entry), GFP_KERNEL); - if (!f2fs_acl) - return ERR_PTR(-ENOMEM); - - f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION); - entry = (struct f2fs_acl_entry *)(f2fs_acl + 1); - - for (i = 0; i < acl->a_count; i++) { - - entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm); - - switch (acl->a_entries[i].e_tag) { - case ACL_USER: - case ACL_GROUP: - entry->e_id = cpu_to_le32(acl->a_entries[i].e_id); - entry = (struct f2fs_acl_entry *)((char *)entry + - sizeof(struct f2fs_acl_entry)); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - entry = (struct f2fs_acl_entry *)((char *)entry + - sizeof(struct f2fs_acl_entry_short)); - break; - default: - goto fail; - } - } - *size = f2fs_acl_size(acl->a_count); - return (void *)f2fs_acl; - -fail: - kfree(f2fs_acl); - return ERR_PTR(-EINVAL); -} - -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; - void *value = NULL; - struct posix_acl *acl; - int retval; - - if (!test_opt(sbi, POSIX_ACL)) - return NULL; - - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - - if (type == ACL_TYPE_ACCESS) - name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); - if (retval > 0) { - value = kmalloc(retval, GFP_KERNEL); - if (!value) - return ERR_PTR(-ENOMEM); - retval = f2fs_getxattr(inode, name_index, "", value, retval); - } - - if (retval > 0) - acl = f2fs_acl_from_disk(value, retval); - else if (retval == -ENODATA) - acl = NULL; - else - acl = ERR_PTR(retval); - kfree(value); - - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - - return acl; -} - -static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_inode_info *fi = F2FS_I(inode); - int name_index; - void *value = NULL; - size_t size = 0; - int error; - - if (!test_opt(sbi, POSIX_ACL)) - return 0; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch (type) { - case ACL_TYPE_ACCESS: - name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return error; - set_acl_inode(fi, inode->i_mode); - if (error == 0) - acl = NULL; - } - break; - - case ACL_TYPE_DEFAULT: - name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; - - default: - return -EINVAL; - } - - if (acl) { - value = f2fs_acl_to_disk(acl, &size); - if (IS_ERR(value)) { - cond_clear_inode_flag(fi, FI_ACL_MODE); - return (int)PTR_ERR(value); - } - } - - error = f2fs_setxattr(inode, name_index, "", value, size, NULL); - - kfree(value); - if (!error) - set_cached_acl(inode, type, acl); - - cond_clear_inode_flag(fi, FI_ACL_MODE); - return error; -} - -int f2fs_init_acl(struct inode *inode, struct inode *dir) -{ - struct posix_acl *acl = NULL; - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - int error = 0; - - if (!S_ISLNK(inode->i_mode)) { - if (test_opt(sbi, POSIX_ACL)) { - acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - } - if (!acl && !(test_opt(sbi, ANDROID_EMU) && - F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU)) - inode->i_mode &= ~current_umask(); - } - - if (test_opt(sbi, POSIX_ACL) && acl) { - - if (S_ISDIR(inode->i_mode)) { - error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto cleanup; - } - error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); - if (error < 0) - return error; - if (error > 0) - error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); - } -cleanup: - posix_acl_release(acl); - return error; -} - -int f2fs_acl_chmod(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct posix_acl *acl; - int error; - mode_t mode = get_inode_mode(inode); - - if (!test_opt(sbi, POSIX_ACL)) - return 0; - if (S_ISLNK(mode)) - return -EOPNOTSUPP; - - acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - - error = posix_acl_chmod(&acl, GFP_KERNEL, mode); - if (error) - return error; - error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); - posix_acl_release(acl); - return error; -} - -int f2fs_android_emu(struct f2fs_sb_info *sbi, struct inode *inode, - u32 *uid, u32 *gid, umode_t *mode) -{ - F2FS_I(inode)->i_advise |= FADVISE_ANDROID_EMU; - - if (uid) - *uid = sbi->android_emu_uid; - if (gid) - *gid = sbi->android_emu_gid; - if (mode) { - *mode = (*mode & ~S_IRWXUGO) | sbi->android_emu_mode; - if (F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU_ROOT) - *mode &= ~S_IRWXO; - if (S_ISDIR(*mode)) { - if (*mode & S_IRUSR) - *mode |= S_IXUSR; - if (*mode & S_IRGRP) - *mode |= S_IXGRP; - if (*mode & S_IROTH) - *mode |= S_IXOTH; - } - } - - return 0; -} - -static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - const char *xname = POSIX_ACL_XATTR_DEFAULT; - size_t size; - - if (!test_opt(sbi, POSIX_ACL)) - return 0; - - if (type == ACL_TYPE_ACCESS) - xname = POSIX_ACL_XATTR_ACCESS; - - size = strlen(xname) + 1; - if (list && size <= list_size) - memcpy(list, xname, size); - return size; -} - -static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - struct posix_acl *acl; - int error; - - if (strcmp(name, "") != 0) - return -EINVAL; - if (!test_opt(sbi, POSIX_ACL)) - return -EOPNOTSUPP; - - acl = f2fs_get_acl(dentry->d_inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) - return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - struct inode *inode = dentry->d_inode; - struct posix_acl *acl = NULL; - int error; - - if (strcmp(name, "") != 0) - return -EINVAL; - if (!test_opt(sbi, POSIX_ACL)) - return -EOPNOTSUPP; - if (!inode_owner_or_capable(inode)) - return -EPERM; - - if (value) { - acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - error = posix_acl_valid(acl); - if (error) - goto release_and_out; - } - } else { - acl = NULL; - } - - error = f2fs_set_acl(inode, type, acl); - -release_and_out: - posix_acl_release(acl); - return error; -} - -const struct xattr_handler f2fs_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .list = f2fs_xattr_list_acl, - .get = f2fs_xattr_get_acl, - .set = f2fs_xattr_set_acl, -}; - -const struct xattr_handler f2fs_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .flags = ACL_TYPE_ACCESS, - .list = f2fs_xattr_list_acl, - .get = f2fs_xattr_get_acl, - .set = f2fs_xattr_set_acl, -}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h deleted file mode 100644 index 80f43067441..00000000000 --- a/fs/f2fs/acl.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * fs/f2fs/acl.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * Portions of this code from linux/fs/ext2/acl.h - * - * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __F2FS_ACL_H__ -#define __F2FS_ACL_H__ - -#include - -#define F2FS_ACL_VERSION 0x0001 - -struct f2fs_acl_entry { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -}; - -struct f2fs_acl_entry_short { - __le16 e_tag; - __le16 e_perm; -}; - -struct f2fs_acl_header { - __le32 a_version; -}; - -#ifdef CONFIG_F2FS_FS_POSIX_ACL - -extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); -extern int f2fs_acl_chmod(struct inode *inode); -extern int f2fs_init_acl(struct inode *inode, struct inode *dir); -#else -#define f2fs_check_acl NULL -#define f2fs_get_acl NULL -#define f2fs_set_acl NULL - -static inline int f2fs_acl_chmod(struct inode *inode) -{ - return 0; -} - -static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) -{ - return 0; -} -#endif -#endif /* __F2FS_ACL_H__ */ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c deleted file mode 100644 index db6a633362d..00000000000 --- a/fs/f2fs/checkpoint.c +++ /dev/null @@ -1,860 +0,0 @@ -/* - * fs/f2fs/checkpoint.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "segment.h" -#include - -static struct kmem_cache *orphan_entry_slab; -static struct kmem_cache *inode_entry_slab; - -/* - * We guarantee no failure on the returned page. - */ -struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) -{ - struct address_space *mapping = sbi->meta_inode->i_mapping; - struct page *page = NULL; -repeat: - page = grab_cache_page(mapping, index); - if (!page) { - cond_resched(); - goto repeat; - } - - /* We wait writeback only inside grab_meta_page() */ - wait_on_page_writeback(page); - SetPageUptodate(page); - return page; -} - -/* - * We guarantee no failure on the returned page. - */ -struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) -{ - struct address_space *mapping = sbi->meta_inode->i_mapping; - struct page *page; -repeat: - page = grab_cache_page(mapping, index); - if (!page) { - cond_resched(); - goto repeat; - } - if (PageUptodate(page)) - goto out; - - if (f2fs_readpage(sbi, page, index, READ_SYNC)) - goto repeat; - - lock_page(page); - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } -out: - mark_page_accessed(page); - return page; -} - -static int f2fs_write_meta_page(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - - /* Should not write any meta pages, if any IO error was occurred */ - if (wbc->for_reclaim || sbi->por_doing || - is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { - dec_page_count(sbi, F2FS_DIRTY_META); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; - } - - wait_on_page_writeback(page); - - write_meta_page(sbi, page); - dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); - return 0; -} - -static int f2fs_write_meta_pages(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - struct block_device *bdev = sbi->sb->s_bdev; - long written; - - if (wbc->for_kupdate) - return 0; - - if (get_pages(sbi, F2FS_DIRTY_META) == 0) - return 0; - - /* if mounting is failed, skip writing node pages */ - mutex_lock(&sbi->cp_mutex); - written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); - mutex_unlock(&sbi->cp_mutex); - wbc->nr_to_write -= written; - return 0; -} - -long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, - long nr_to_write) -{ - struct address_space *mapping = sbi->meta_inode->i_mapping; - pgoff_t index = 0, end = LONG_MAX; - struct pagevec pvec; - long nwritten = 0; - struct writeback_control wbc = { - .for_reclaim = 0, - }; - - pagevec_init(&pvec, 0); - - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - lock_page(page); - BUG_ON(page->mapping != mapping); - BUG_ON(!PageDirty(page)); - clear_page_dirty_for_io(page); - if (f2fs_write_meta_page(page, &wbc)) { - unlock_page(page); - break; - } - if (nwritten++ >= nr_to_write) - break; - } - pagevec_release(&pvec); - cond_resched(); - } - - if (nwritten) - f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); - - return nwritten; -} - -static int f2fs_set_meta_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - - SetPageUptodate(page); - if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); - inc_page_count(sbi, F2FS_DIRTY_META); - return 1; - } - return 0; -} - -const struct address_space_operations f2fs_meta_aops = { - .writepage = f2fs_write_meta_page, - .writepages = f2fs_write_meta_pages, - .set_page_dirty = f2fs_set_meta_page_dirty, -}; - -int acquire_orphan_inode(struct f2fs_sb_info *sbi) -{ - unsigned int max_orphans; - int err = 0; - - /* - * considering 512 blocks in a segment 5 blocks are needed for cp - * and log segment summaries. Remaining blocks are used to keep - * orphan entries with the limitation one reserved segment - * for cp pack we can have max 1020*507 orphan entries - */ - max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; - mutex_lock(&sbi->orphan_inode_mutex); - if (sbi->n_orphans >= max_orphans) - err = -ENOSPC; - else - sbi->n_orphans++; - mutex_unlock(&sbi->orphan_inode_mutex); - return err; -} - -void release_orphan_inode(struct f2fs_sb_info *sbi) -{ - mutex_lock(&sbi->orphan_inode_mutex); - if (sbi->n_orphans == 0) { - f2fs_msg(sbi->sb, KERN_ERR, "releasing " - "unacquired orphan inode"); - f2fs_handle_error(sbi); - } else - sbi->n_orphans--; - mutex_unlock(&sbi->orphan_inode_mutex); -} - -void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ - struct list_head *head, *this; - struct orphan_inode_entry *new = NULL, *orphan = NULL; - - mutex_lock(&sbi->orphan_inode_mutex); - head = &sbi->orphan_inode_list; - list_for_each(this, head) { - orphan = list_entry(this, struct orphan_inode_entry, list); - if (orphan->ino == ino) - goto out; - if (orphan->ino > ino) - break; - orphan = NULL; - } -retry: - new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); - if (!new) { - cond_resched(); - goto retry; - } - new->ino = ino; - - /* add new_oentry into list which is sorted by inode number */ - if (orphan) - list_add(&new->list, this->prev); - else - list_add_tail(&new->list, head); -out: - mutex_unlock(&sbi->orphan_inode_mutex); -} - -void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ - struct list_head *head; - struct orphan_inode_entry *orphan; - - mutex_lock(&sbi->orphan_inode_mutex); - head = &sbi->orphan_inode_list; - list_for_each_entry(orphan, head, list) { - if (orphan->ino == ino) { - list_del(&orphan->list); - kmem_cache_free(orphan_entry_slab, orphan); - if (sbi->n_orphans == 0) { - f2fs_msg(sbi->sb, KERN_ERR, "removing " - "unacquired orphan inode %d", - ino); - f2fs_handle_error(sbi); - } else - sbi->n_orphans--; - break; - } - } - mutex_unlock(&sbi->orphan_inode_mutex); -} - -static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ - struct inode *inode = f2fs_iget(sbi->sb, ino); - if (IS_ERR(inode)) { - f2fs_msg(sbi->sb, KERN_ERR, "unable to recover orphan inode %d", - ino); - f2fs_handle_error(sbi); - return; - } - clear_nlink(inode); - - /* truncate all the data during iput */ - iput(inode); -} - -int recover_orphan_inodes(struct f2fs_sb_info *sbi) -{ - block_t start_blk, orphan_blkaddr, i, j; - - if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) - return 0; - - sbi->por_doing = 1; - start_blk = __start_cp_addr(sbi) + 1; - orphan_blkaddr = __start_sum_addr(sbi) - 1; - - for (i = 0; i < orphan_blkaddr; i++) { - struct page *page = get_meta_page(sbi, start_blk + i); - struct f2fs_orphan_block *orphan_blk; - - orphan_blk = (struct f2fs_orphan_block *)page_address(page); - for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { - nid_t ino = le32_to_cpu(orphan_blk->ino[j]); - recover_orphan_inode(sbi, ino); - } - f2fs_put_page(page, 1); - } - /* clear Orphan Flag */ - clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); - sbi->por_doing = 0; - return 0; -} - -static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) -{ - struct list_head *head, *this, *next; - struct f2fs_orphan_block *orphan_blk = NULL; - struct page *page = NULL; - unsigned int nentries = 0; - unsigned short index = 1; - unsigned short orphan_blocks; - - orphan_blocks = (unsigned short)((sbi->n_orphans + - (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); - - mutex_lock(&sbi->orphan_inode_mutex); - head = &sbi->orphan_inode_list; - - /* loop for each orphan inode entry and write them in Jornal block */ - list_for_each_safe(this, next, head) { - struct orphan_inode_entry *orphan; - - orphan = list_entry(this, struct orphan_inode_entry, list); - - if (nentries == F2FS_ORPHANS_PER_BLOCK) { - /* - * an orphan block is full of 1020 entries, - * then we need to flush current orphan blocks - * and bring another one in memory - */ - orphan_blk->blk_addr = cpu_to_le16(index); - orphan_blk->blk_count = cpu_to_le16(orphan_blocks); - orphan_blk->entry_count = cpu_to_le32(nentries); - set_page_dirty(page); - f2fs_put_page(page, 1); - index++; - start_blk++; - nentries = 0; - page = NULL; - } - if (page) - goto page_exist; - - page = grab_meta_page(sbi, start_blk); - orphan_blk = (struct f2fs_orphan_block *)page_address(page); - memset(orphan_blk, 0, sizeof(*orphan_blk)); -page_exist: - orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); - } - if (!page) - goto end; - - orphan_blk->blk_addr = cpu_to_le16(index); - orphan_blk->blk_count = cpu_to_le16(orphan_blocks); - orphan_blk->entry_count = cpu_to_le32(nentries); - set_page_dirty(page); - f2fs_put_page(page, 1); -end: - mutex_unlock(&sbi->orphan_inode_mutex); -} - -static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, - block_t cp_addr, unsigned long long *version) -{ - struct page *cp_page_1, *cp_page_2 = NULL; - unsigned long blk_size = sbi->blocksize; - struct f2fs_checkpoint *cp_block; - unsigned long long cur_version = 0, pre_version = 0; - size_t crc_offset; - __u32 crc = 0; - - /* Read the 1st cp block in this CP pack */ - cp_page_1 = get_meta_page(sbi, cp_addr); - - /* get the version number */ - cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); - crc_offset = le32_to_cpu(cp_block->checksum_offset); - if (crc_offset >= blk_size) - goto invalid_cp1; - - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); - if (!f2fs_crc_valid(crc, cp_block, crc_offset)) - goto invalid_cp1; - - pre_version = cur_cp_version(cp_block); - - /* Read the 2nd cp block in this CP pack */ - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; - cp_page_2 = get_meta_page(sbi, cp_addr); - - cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); - crc_offset = le32_to_cpu(cp_block->checksum_offset); - if (crc_offset >= blk_size) - goto invalid_cp2; - - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); - if (!f2fs_crc_valid(crc, cp_block, crc_offset)) - goto invalid_cp2; - - cur_version = cur_cp_version(cp_block); - - if (cur_version == pre_version) { - *version = cur_version; - f2fs_put_page(cp_page_2, 1); - return cp_page_1; - } -invalid_cp2: - f2fs_put_page(cp_page_2, 1); -invalid_cp1: - f2fs_put_page(cp_page_1, 1); - return NULL; -} - -int get_valid_checkpoint(struct f2fs_sb_info *sbi) -{ - struct f2fs_checkpoint *cp_block; - struct f2fs_super_block *fsb = sbi->raw_super; - struct page *cp1, *cp2, *cur_page; - unsigned long blk_size = sbi->blocksize; - unsigned long long cp1_version = 0, cp2_version = 0; - unsigned long long cp_start_blk_no; - - sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); - if (!sbi->ckpt) - return -ENOMEM; - /* - * Finding out valid cp block involves read both - * sets( cp pack1 and cp pack 2) - */ - cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); - cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); - - /* The second checkpoint pack should start at the next segment */ - cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); - cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); - - if (cp1 && cp2) { - if (ver_after(cp2_version, cp1_version)) - cur_page = cp2; - else - cur_page = cp1; - } else if (cp1) { - cur_page = cp1; - } else if (cp2) { - cur_page = cp2; - } else { - goto fail_no_cp; - } - - cp_block = (struct f2fs_checkpoint *)page_address(cur_page); - memcpy(sbi->ckpt, cp_block, blk_size); - - f2fs_put_page(cp1, 1); - f2fs_put_page(cp2, 1); - return 0; - -fail_no_cp: - kfree(sbi->ckpt); - return -EINVAL; -} - -static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head = &sbi->dir_inode_list; - struct list_head *this; - - list_for_each(this, head) { - struct dir_inode_entry *entry; - entry = list_entry(this, struct dir_inode_entry, list); - if (entry->inode == inode) - return -EEXIST; - } - list_add_tail(&new->list, head); -#ifdef CONFIG_F2FS_STAT_FS - sbi->n_dirty_dirs++; -#endif - return 0; -} - -void set_dirty_dir_page(struct inode *inode, struct page *page) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct dir_inode_entry *new; - - if (!S_ISDIR(inode->i_mode)) - return; -retry: - new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - if (!new) { - cond_resched(); - goto retry; - } - new->inode = inode; - INIT_LIST_HEAD(&new->list); - - spin_lock(&sbi->dir_inode_lock); - if (__add_dirty_inode(inode, new)) - kmem_cache_free(inode_entry_slab, new); - - inc_page_count(sbi, F2FS_DIRTY_DENTS); - inode_inc_dirty_dents(inode); - SetPagePrivate(page); - spin_unlock(&sbi->dir_inode_lock); -} - -void add_dirty_dir_inode(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct dir_inode_entry *new; -retry: - new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - if (!new) { - cond_resched(); - goto retry; - } - new->inode = inode; - INIT_LIST_HEAD(&new->list); - - spin_lock(&sbi->dir_inode_lock); - if (__add_dirty_inode(inode, new)) - kmem_cache_free(inode_entry_slab, new); - spin_unlock(&sbi->dir_inode_lock); -} - -void remove_dirty_dir_inode(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head = &sbi->dir_inode_list; - struct list_head *this; - - if (!S_ISDIR(inode->i_mode)) - return; - - spin_lock(&sbi->dir_inode_lock); - if (atomic_read(&F2FS_I(inode)->dirty_dents)) { - spin_unlock(&sbi->dir_inode_lock); - return; - } - - list_for_each(this, head) { - struct dir_inode_entry *entry; - entry = list_entry(this, struct dir_inode_entry, list); - if (entry->inode == inode) { - list_del(&entry->list); - kmem_cache_free(inode_entry_slab, entry); -#ifdef CONFIG_F2FS_STAT_FS - sbi->n_dirty_dirs--; -#endif - break; - } - } - spin_unlock(&sbi->dir_inode_lock); - - /* Only from the recovery routine */ - if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { - clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); - iput(inode); - } -} - -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ - struct list_head *head = &sbi->dir_inode_list; - struct list_head *this; - struct inode *inode = NULL; - - spin_lock(&sbi->dir_inode_lock); - list_for_each(this, head) { - struct dir_inode_entry *entry; - entry = list_entry(this, struct dir_inode_entry, list); - if (entry->inode->i_ino == ino) { - inode = entry->inode; - break; - } - } - spin_unlock(&sbi->dir_inode_lock); - return inode; -} - -void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) -{ - struct list_head *head = &sbi->dir_inode_list; - struct dir_inode_entry *entry; - struct inode *inode; -retry: - spin_lock(&sbi->dir_inode_lock); - if (list_empty(head)) { - spin_unlock(&sbi->dir_inode_lock); - return; - } - entry = list_entry(head->next, struct dir_inode_entry, list); - inode = igrab(entry->inode); - spin_unlock(&sbi->dir_inode_lock); - if (inode) { - filemap_flush(inode->i_mapping); - iput(inode); - } else { - /* - * We should submit bio, since it exists several - * wribacking dentry pages in the freeing inode. - */ - f2fs_submit_bio(sbi, DATA, true); - } - goto retry; -} - -/* - * Freeze all the FS-operations for checkpoint. - */ -static void block_operations(struct f2fs_sb_info *sbi) -{ - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_reclaim = 0, - }; - struct blk_plug plug; - - blk_start_plug(&plug); - -retry_flush_dents: - mutex_lock_all(sbi); - - /* write all the dirty dentry pages */ - if (get_pages(sbi, F2FS_DIRTY_DENTS)) { - mutex_unlock_all(sbi); - sync_dirty_dir_inodes(sbi); - goto retry_flush_dents; - } - - /* - * POR: we should ensure that there is no dirty node pages - * until finishing nat/sit flush. - */ -retry_flush_nodes: - mutex_lock(&sbi->node_write); - - if (get_pages(sbi, F2FS_DIRTY_NODES)) { - mutex_unlock(&sbi->node_write); - sync_node_pages(sbi, 0, &wbc); - goto retry_flush_nodes; - } - blk_finish_plug(&plug); -} - -static void unblock_operations(struct f2fs_sb_info *sbi) -{ - mutex_unlock(&sbi->node_write); - mutex_unlock_all(sbi); -} - -static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) -{ - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - nid_t last_nid = 0; - block_t start_blk; - struct page *cp_page; - unsigned int data_sum_blocks, orphan_blocks; - __u32 crc32 = 0; - void *kaddr; - int i; - - /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) - sync_meta_pages(sbi, META, LONG_MAX); - - next_free_nid(sbi, &last_nid); - - /* - * modify checkpoint - * version number is already updated - */ - ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); - ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); - ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); - for (i = 0; i < 3; i++) { - ckpt->cur_node_segno[i] = - cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); - ckpt->cur_node_blkoff[i] = - cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); - ckpt->alloc_type[i + CURSEG_HOT_NODE] = - curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); - } - for (i = 0; i < 3; i++) { - ckpt->cur_data_segno[i] = - cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); - ckpt->cur_data_blkoff[i] = - cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); - ckpt->alloc_type[i + CURSEG_HOT_DATA] = - curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); - } - - ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); - ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); - ckpt->next_free_nid = cpu_to_le32(last_nid); - - /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi); - if (data_sum_blocks < 3) - set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - else - clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - - orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) - / F2FS_ORPHANS_PER_BLOCK; - ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); - - if (is_umount) { - set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); - } else { - clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks); - } - - if (sbi->n_orphans) - set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - else - clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - - /* update SIT/NAT bitmap */ - get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); - get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); - - crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); - *((__le32 *)((unsigned char *)ckpt + - le32_to_cpu(ckpt->checksum_offset))) - = cpu_to_le32(crc32); - - start_blk = __start_cp_addr(sbi); - - /* write out checkpoint buffer at block 0 */ - cp_page = grab_meta_page(sbi, start_blk++); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - - if (sbi->n_orphans) { - write_orphan_inodes(sbi, start_blk); - start_blk += orphan_blocks; - } - - write_data_summaries(sbi, start_blk); - start_blk += data_sum_blocks; - if (is_umount) { - write_node_summaries(sbi, start_blk); - start_blk += NR_CURSEG_NODE_TYPE; - } - - /* writeout checkpoint block */ - cp_page = grab_meta_page(sbi, start_blk); - kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); - set_page_dirty(cp_page); - f2fs_put_page(cp_page, 1); - - /* wait for previous submitted node/meta pages writeback */ - while (get_pages(sbi, F2FS_WRITEBACK)) - congestion_wait(BLK_RW_ASYNC, HZ / 50); - - filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); - filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); - - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - sbi->alloc_valid_block_count = 0; - - /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX); - - if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { - clear_prefree_segments(sbi); - F2FS_RESET_SB_DIRT(sbi); - } -} - -/* - * We guarantee that this checkpoint procedure should not fail. - */ -void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) -{ - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - unsigned long long ckpt_ver; - - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); - - mutex_lock(&sbi->cp_mutex); - block_operations(sbi); - - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); - - f2fs_submit_bio(sbi, DATA, true); - f2fs_submit_bio(sbi, NODE, true); - f2fs_submit_bio(sbi, META, true); - - /* - * update checkpoint pack index - * Increase the version number so that - * SIT entries and seg summaries are written at correct place - */ - ckpt_ver = cur_cp_version(ckpt); - ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); - - /* write cached NAT/SIT entries to NAT/SIT area */ - flush_nat_entries(sbi); - flush_sit_entries(sbi); - - /* unlock all the fs_lock[] in do_checkpoint() */ - do_checkpoint(sbi, is_umount); - - unblock_operations(sbi); - mutex_unlock(&sbi->cp_mutex); - - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); -} - -void init_orphan_info(struct f2fs_sb_info *sbi) -{ - mutex_init(&sbi->orphan_inode_mutex); - INIT_LIST_HEAD(&sbi->orphan_inode_list); - sbi->n_orphans = 0; -} - -int __init create_checkpoint_caches(void) -{ - orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", - sizeof(struct orphan_inode_entry), NULL); - if (unlikely(!orphan_entry_slab)) - return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", - sizeof(struct dir_inode_entry), NULL); - if (unlikely(!inode_entry_slab)) { - kmem_cache_destroy(orphan_entry_slab); - return -ENOMEM; - } - return 0; -} - -void destroy_checkpoint_caches(void) -{ - kmem_cache_destroy(orphan_entry_slab); - kmem_cache_destroy(inode_entry_slab); -} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c deleted file mode 100644 index 550adc3cc6b..00000000000 --- a/fs/f2fs/data.c +++ /dev/null @@ -1,790 +0,0 @@ -/* - * fs/f2fs/data.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "segment.h" -#include - -/* - * Lock ordering for the change of data block address: - * ->data_page - * ->node_page - * update block addresses in the node page - */ -static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) -{ - struct f2fs_node *rn; - __le32 *addr_array; - struct page *node_page = dn->node_page; - unsigned int ofs_in_node = dn->ofs_in_node; - - f2fs_wait_on_page_writeback(node_page, NODE, false); - - rn = F2FS_NODE(node_page); - - /* Get physical address of data block */ - addr_array = blkaddr_in_node(rn); - addr_array[ofs_in_node] = cpu_to_le32(new_addr); - set_page_dirty(node_page); -} - -int reserve_new_block(struct dnode_of_data *dn) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - - if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) - return -EPERM; - if (!inc_valid_block_count(sbi, dn->inode, 1)) - return -ENOSPC; - - trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); - - __set_data_blkaddr(dn, NEW_ADDR); - dn->data_blkaddr = NEW_ADDR; - sync_inode_page(dn); - return 0; -} - -static int check_extent_cache(struct inode *inode, pgoff_t pgofs, - struct buffer_head *bh_result) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); -#ifdef CONFIG_F2FS_STAT_FS - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -#endif - pgoff_t start_fofs, end_fofs; - block_t start_blkaddr; - - read_lock(&fi->ext.ext_lock); - if (fi->ext.len == 0) { - read_unlock(&fi->ext.ext_lock); - return 0; - } - -#ifdef CONFIG_F2FS_STAT_FS - sbi->total_hit_ext++; -#endif - start_fofs = fi->ext.fofs; - end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; - - if (pgofs >= start_fofs && pgofs <= end_fofs) { - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - size_t count; - - clear_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, - start_blkaddr + pgofs - start_fofs); - count = end_fofs - pgofs + 1; - if (count < (UINT_MAX >> blkbits)) - bh_result->b_size = (count << blkbits); - else - bh_result->b_size = UINT_MAX; - -#ifdef CONFIG_F2FS_STAT_FS - sbi->read_hit_ext++; -#endif - read_unlock(&fi->ext.ext_lock); - return 1; - } - read_unlock(&fi->ext.ext_lock); - return 0; -} - -void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) -{ - struct f2fs_inode_info *fi = F2FS_I(dn->inode); - pgoff_t fofs, start_fofs, end_fofs; - block_t start_blkaddr, end_blkaddr; - - BUG_ON(blk_addr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; - - /* Update the page address in the parent node */ - __set_data_blkaddr(dn, blk_addr); - - write_lock(&fi->ext.ext_lock); - - start_fofs = fi->ext.fofs; - end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; - end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; - - /* Drop and initialize the matched extent */ - if (fi->ext.len == 1 && fofs == start_fofs) - fi->ext.len = 0; - - /* Initial extent */ - if (fi->ext.len == 0) { - if (blk_addr != NULL_ADDR) { - fi->ext.fofs = fofs; - fi->ext.blk_addr = blk_addr; - fi->ext.len = 1; - } - goto end_update; - } - - /* Front merge */ - if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { - fi->ext.fofs--; - fi->ext.blk_addr--; - fi->ext.len++; - goto end_update; - } - - /* Back merge */ - if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { - fi->ext.len++; - goto end_update; - } - - /* Split the existing extent */ - if (fi->ext.len > 1 && - fofs >= start_fofs && fofs <= end_fofs) { - if ((end_fofs - fofs) < (fi->ext.len >> 1)) { - fi->ext.len = fofs - start_fofs; - } else { - fi->ext.fofs = fofs + 1; - fi->ext.blk_addr = start_blkaddr + - fofs - start_fofs + 1; - fi->ext.len -= fofs - start_fofs + 1; - } - goto end_update; - } - write_unlock(&fi->ext.ext_lock); - return; - -end_update: - write_unlock(&fi->ext.ext_lock); - sync_inode_page(dn); -} - -struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct address_space *mapping = inode->i_mapping; - struct dnode_of_data dn; - struct page *page; - int err; - - page = find_get_page(mapping, index); - if (page && PageUptodate(page)) - return page; - f2fs_put_page(page, 0); - - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) - return ERR_PTR(err); - f2fs_put_dnode(&dn); - - if (dn.data_blkaddr == NULL_ADDR) - return ERR_PTR(-ENOENT); - - /* By fallocate(), there is no cached page, but with NEW_ADDR */ - if (dn.data_blkaddr == NEW_ADDR) - return ERR_PTR(-EINVAL); - - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); - if (!page) - return ERR_PTR(-ENOMEM); - - if (PageUptodate(page)) { - unlock_page(page); - return page; - } - - err = f2fs_readpage(sbi, page, dn.data_blkaddr, - sync ? READ_SYNC : READA); - if (sync) { - wait_on_page_locked(page); - if (!PageUptodate(page)) { - f2fs_put_page(page, 0); - return ERR_PTR(-EIO); - } - } - return page; -} - -/* - * If it tries to access a hole, return an error. - * Because, the callers, functions in dir.c and GC, should be able to know - * whether this page exists or not. - */ -struct page *get_lock_data_page(struct inode *inode, pgoff_t index) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct address_space *mapping = inode->i_mapping; - struct dnode_of_data dn; - struct page *page; - int err; - -repeat: - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); - if (!page) - return ERR_PTR(-ENOMEM); - - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) { - f2fs_put_page(page, 1); - return ERR_PTR(err); - } - f2fs_put_dnode(&dn); - - if (dn.data_blkaddr == NULL_ADDR) { - f2fs_put_page(page, 1); - return ERR_PTR(-ENOENT); - } - - if (PageUptodate(page)) - return page; - - /* - * A new dentry page is allocated but not able to be written, since its - * new inode page couldn't be allocated due to -ENOSPC. - * In such the case, its blkaddr can be remained as NEW_ADDR. - * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. - */ - if (dn.data_blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - SetPageUptodate(page); - return page; - } - - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) - return ERR_PTR(err); - - lock_page(page); - if (!PageUptodate(page)) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } - return page; -} - -/* - * Caller ensures that this data page is never allocated. - * A new zero-filled data page is allocated in the page cache. - * - * Also, caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). - * Note that, npage is set only by make_empty_dir. - */ -struct page *get_new_data_page(struct inode *inode, - struct page *npage, pgoff_t index, bool new_i_size) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct address_space *mapping = inode->i_mapping; - struct page *page; - struct dnode_of_data dn; - int err; - - set_new_dnode(&dn, inode, npage, npage, 0); - err = get_dnode_of_data(&dn, index, ALLOC_NODE); - if (err) - return ERR_PTR(err); - - if (dn.data_blkaddr == NULL_ADDR) { - if (reserve_new_block(&dn)) { - if (!npage) - f2fs_put_dnode(&dn); - return ERR_PTR(-ENOSPC); - } - } - if (!npage) - f2fs_put_dnode(&dn); -repeat: - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); - - if (PageUptodate(page)) - return page; - - if (dn.data_blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - SetPageUptodate(page); - } else { - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) - return ERR_PTR(err); - lock_page(page); - if (!PageUptodate(page)) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } - } - - if (new_i_size && - i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { - i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); - /* Only the directory inode sets new_i_size */ - set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); - mark_inode_dirty_sync(inode); - } - return page; -} - -static void read_end_io(struct bio *bio, int err) -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - - do { - struct page *page = bvec->bv_page; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); - } while (bvec >= bio->bi_io_vec); - bio_put(bio); -} - -/* - * Fill the locked page with data located in the block address. - * Return unlocked page. - */ -int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, int type) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct bio *bio; - - trace_f2fs_readpage(page, blk_addr, type); - - down_read(&sbi->bio_sem); - - /* Allocate a new bio */ - bio = f2fs_bio_alloc(bdev, 1); - - /* Initialize the bio */ - bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - bio->bi_end_io = read_end_io; - - if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { - bio_put(bio); - up_read(&sbi->bio_sem); - f2fs_put_page(page, 1); - return -EFAULT; - } - - submit_bio(type, bio); - up_read(&sbi->bio_sem); - return 0; -} - -/* - * This function should be used by the data read flow only where it - * does not check the "create" flag that indicates block allocation. - * The reason for this special functionality is to exploit VFS readahead - * mechanism. - */ -static int get_data_block_ro(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - unsigned maxblocks = bh_result->b_size >> blkbits; - struct dnode_of_data dn; - pgoff_t pgofs; - int err; - - /* Get the page offset from the block offset(iblock) */ - pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); - - if (check_extent_cache(inode, pgofs, bh_result)) { - trace_f2fs_get_data_block(inode, iblock, bh_result, 0); - return 0; - } - - /* When reading holes, we need its node page */ - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); - if (err) { - trace_f2fs_get_data_block(inode, iblock, bh_result, err); - return (err == -ENOENT) ? 0 : err; - } - - /* It does not support data allocation */ - BUG_ON(create); - - if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { - int i; - unsigned int end_offset; - - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : - ADDRS_PER_BLOCK; - - clear_buffer_new(bh_result); - - /* Give more consecutive addresses for the read ahead */ - for (i = 0; i < end_offset - dn.ofs_in_node; i++) - if (((datablock_addr(dn.node_page, - dn.ofs_in_node + i)) - != (dn.data_blkaddr + i)) || maxblocks == i) - break; - map_bh(bh_result, inode->i_sb, dn.data_blkaddr); - bh_result->b_size = (i << blkbits); - } - f2fs_put_dnode(&dn); - trace_f2fs_get_data_block(inode, iblock, bh_result, 0); - return 0; -} - -static int f2fs_read_data_page(struct file *file, struct page *page) -{ - return mpage_readpage(page, get_data_block_ro); -} - -static int f2fs_read_data_pages(struct file *file, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); -} - -int do_write_data_page(struct page *page) -{ - struct inode *inode = page->mapping->host; - block_t old_blk_addr, new_blk_addr; - struct dnode_of_data dn; - int err = 0; - - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); - if (err) - return err; - - old_blk_addr = dn.data_blkaddr; - - /* This page is already truncated */ - if (old_blk_addr == NULL_ADDR) - goto out_writepage; - - set_page_writeback(page); - - /* - * If current allocation needs SSR, - * it had better in-place writes for updated data. - */ - if (unlikely(old_blk_addr != NEW_ADDR && - !is_cold_data(page) && - need_inplace_update(inode))) { - rewrite_data_page(F2FS_SB(inode->i_sb), page, - old_blk_addr); - } else { - write_data_page(inode, page, &dn, - old_blk_addr, &new_blk_addr); - update_extent_cache(new_blk_addr, &dn); - } -out_writepage: - f2fs_put_dnode(&dn); - return err; -} - -static int f2fs_write_data_page(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - loff_t i_size = i_size_read(inode); - const pgoff_t end_index = ((unsigned long long) i_size) - >> PAGE_CACHE_SHIFT; - unsigned offset; - bool need_balance_fs = false; - int err = 0; - - if (page->index < end_index) - goto write; - - /* - * If the offset is out-of-range of file size, - * this page does not have to be written to disk. - */ - offset = i_size & (PAGE_CACHE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) { - if (S_ISDIR(inode->i_mode)) { - dec_page_count(sbi, F2FS_DIRTY_DENTS); - inode_dec_dirty_dents(inode); - } - goto out; - } - - zero_user_segment(page, offset, PAGE_CACHE_SIZE); -write: - if (sbi->por_doing) { - err = AOP_WRITEPAGE_ACTIVATE; - goto redirty_out; - } - - /* Dentry blocks are controlled by checkpoint */ - if (S_ISDIR(inode->i_mode)) { - dec_page_count(sbi, F2FS_DIRTY_DENTS); - inode_dec_dirty_dents(inode); - err = do_write_data_page(page); - } else { - int ilock = mutex_lock_op(sbi); - err = do_write_data_page(page); - mutex_unlock_op(sbi, ilock); - need_balance_fs = true; - } - if (err == -ENOENT) - goto out; - else if (err) - goto redirty_out; - - if (wbc->for_reclaim) - f2fs_submit_bio(sbi, DATA, true); - - clear_cold_data(page); -out: - unlock_page(page); - if (need_balance_fs) - f2fs_balance_fs(sbi); - return 0; - -redirty_out: - wbc->pages_skipped++; - set_page_dirty(page); - return err; -} - -#define MAX_DESIRED_PAGES_WP 4096 - -static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct address_space *mapping = data; - int ret = mapping->a_ops->writepage(page, wbc); - mapping_set_error(mapping, ret); - return ret; -} - -static int f2fs_write_data_pages(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct inode *inode = mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - bool locked = false; - int ret; - long excess_nrtw = 0, desired_nrtw; - - /* deal with chardevs and other special file */ - if (!mapping->a_ops->writepage) - return 0; - - if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { - desired_nrtw = MAX_DESIRED_PAGES_WP; - excess_nrtw = desired_nrtw - wbc->nr_to_write; - wbc->nr_to_write = desired_nrtw; - } - - if (!S_ISDIR(inode->i_mode)) { - mutex_lock(&sbi->writepages); - locked = true; - } - ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); - if (locked) - mutex_unlock(&sbi->writepages); - f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); - - remove_dirty_dir_inode(inode); - - wbc->nr_to_write -= excess_nrtw; - return ret; -} - -static int f2fs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - struct inode *inode = mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *page; - pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; - struct dnode_of_data dn; - int err = 0; - int ilock; - - f2fs_balance_fs(sbi); -repeat: - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; - *pagep = page; - - ilock = mutex_lock_op(sbi); - - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, ALLOC_NODE); - if (err) - goto err; - - if (dn.data_blkaddr == NULL_ADDR) - err = reserve_new_block(&dn); - - f2fs_put_dnode(&dn); - if (err) - goto err; - - mutex_unlock_op(sbi, ilock); - - if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) - return 0; - - if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { - unsigned start = pos & (PAGE_CACHE_SIZE - 1); - unsigned end = start + len; - - /* Reading beyond i_size is simple: memset to zero */ - zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); - goto out; - } - - if (dn.data_blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - } else { - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) - return err; - lock_page(page); - if (!PageUptodate(page)) { - f2fs_put_page(page, 1); - return -EIO; - } - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } - } -out: - SetPageUptodate(page); - clear_cold_data(page); - return 0; - -err: - mutex_unlock_op(sbi, ilock); - f2fs_put_page(page, 1); - return err; -} - -static int f2fs_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = page->mapping->host; - - SetPageUptodate(page); - set_page_dirty(page); - - if (pos + copied > i_size_read(inode)) { - i_size_write(inode, pos + copied); - mark_inode_dirty(inode); - update_inode_page(inode); - } - - unlock_page(page); - page_cache_release(page); - return copied; -} - -static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - - if (rw == WRITE) - return 0; - - /* Needs synchronization with the cleaner */ - return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - get_data_block_ro); -} - -static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) -{ - struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - if (S_ISDIR(inode->i_mode) && PageDirty(page)) { - dec_page_count(sbi, F2FS_DIRTY_DENTS); - inode_dec_dirty_dents(inode); - } - ClearPagePrivate(page); -} - -static int f2fs_release_data_page(struct page *page, gfp_t wait) -{ - ClearPagePrivate(page); - return 1; -} - -static int f2fs_set_data_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - - SetPageUptodate(page); - if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); - set_dirty_dir_page(inode, page); - return 1; - } - return 0; -} - -static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) -{ - return generic_block_bmap(mapping, block, get_data_block_ro); -} - -const struct address_space_operations f2fs_dblock_aops = { - .readpage = f2fs_read_data_page, - .readpages = f2fs_read_data_pages, - .writepage = f2fs_write_data_page, - .writepages = f2fs_write_data_pages, - .write_begin = f2fs_write_begin, - .write_end = f2fs_write_end, - .set_page_dirty = f2fs_set_data_page_dirty, - .invalidatepage = f2fs_invalidate_data_page, - .releasepage = f2fs_release_data_page, - .direct_IO = f2fs_direct_IO, - .bmap = f2fs_bmap, -}; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c deleted file mode 100644 index a84b0a8e685..00000000000 --- a/fs/f2fs/debug.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * f2fs debugging statistics - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * Copyright (c) 2012 Linux Foundation - * Copyright (c) 2012 Greg Kroah-Hartman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "segment.h" -#include "gc.h" - -static LIST_HEAD(f2fs_stat_list); -static struct dentry *debugfs_root; -static DEFINE_MUTEX(f2fs_stat_mutex); - -static void update_general_status(struct f2fs_sb_info *sbi) -{ - struct f2fs_stat_info *si = F2FS_STAT(sbi); - int i; - - /* valid check of the segment numbers */ - si->hit_ext = sbi->read_hit_ext; - si->total_ext = sbi->total_hit_ext; - si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); - si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); - si->ndirty_dirs = sbi->n_dirty_dirs; - si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); - si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; - si->rsvd_segs = reserved_segments(sbi); - si->overp_segs = overprovision_segments(sbi); - si->valid_count = valid_user_blocks(sbi); - si->valid_node_count = valid_node_count(sbi); - si->valid_inode_count = valid_inode_count(sbi); - si->utilization = utilization(sbi); - - si->free_segs = free_segments(sbi); - si->free_secs = free_sections(sbi); - si->prefree_count = prefree_segments(sbi); - si->dirty_count = dirty_segments(sbi); - si->node_pages = sbi->node_inode->i_mapping->nrpages; - si->meta_pages = sbi->meta_inode->i_mapping->nrpages; - si->nats = NM_I(sbi)->nat_cnt; - si->sits = SIT_I(sbi)->dirty_sentries; - si->fnids = NM_I(sbi)->fcnt; - si->bg_gc = sbi->bg_gc; - si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) - * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) - / 2; - si->util_valid = (int)(written_block_count(sbi) >> - sbi->log_blocks_per_seg) - * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) - / 2; - si->util_invalid = 50 - si->util_free - si->util_valid; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { - struct curseg_info *curseg = CURSEG_I(sbi, i); - si->curseg[i] = curseg->segno; - si->cursec[i] = curseg->segno / sbi->segs_per_sec; - si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; - } - - for (i = 0; i < 2; i++) { - si->segment_count[i] = sbi->segment_count[i]; - si->block_count[i] = sbi->block_count[i]; - } -} - -/* - * This function calculates BDF of every segments - */ -static void update_sit_info(struct f2fs_sb_info *sbi) -{ - struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; - struct sit_info *sit_i = SIT_I(sbi); - unsigned int segno, vblocks; - int ndirty = 0; - - bimodal = 0; - total_vblocks = 0; - blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); - hblks_per_sec = blks_per_sec / 2; - mutex_lock(&sit_i->sentry_lock); - for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); - dist = abs(vblocks - hblks_per_sec); - bimodal += dist * dist; - - if (vblocks > 0 && vblocks < blks_per_sec) { - total_vblocks += vblocks; - ndirty++; - } - } - mutex_unlock(&sit_i->sentry_lock); - dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; - si->bimodal = bimodal / dist; - if (si->dirty_count) - si->avg_vblocks = total_vblocks / ndirty; - else - si->avg_vblocks = 0; -} - -/* - * This function calculates memory footprint. - */ -static void update_mem_info(struct f2fs_sb_info *sbi) -{ - struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned npages; - - if (si->base_mem) - goto get_cache; - - si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; - si->base_mem += 2 * sizeof(struct f2fs_inode_info); - si->base_mem += sizeof(*sbi->ckpt); - - /* build sm */ - si->base_mem += sizeof(struct f2fs_sm_info); - - /* build sit */ - si->base_mem += sizeof(struct sit_info); - si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); - si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); - if (sbi->segs_per_sec > 1) - si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); - si->base_mem += __bitmap_size(sbi, SIT_BITMAP); - - /* build free segmap */ - si->base_mem += sizeof(struct free_segmap_info); - si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); - - /* build curseg */ - si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; - si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE; - - /* build dirty segmap */ - si->base_mem += sizeof(struct dirty_seglist_info); - si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); - - /* buld nm */ - si->base_mem += sizeof(struct f2fs_nm_info); - si->base_mem += __bitmap_size(sbi, NAT_BITMAP); - - /* build gc */ - si->base_mem += sizeof(struct f2fs_gc_kthread); - -get_cache: - /* free nids */ - si->cache_mem = NM_I(sbi)->fcnt; - si->cache_mem += NM_I(sbi)->nat_cnt; - npages = sbi->node_inode->i_mapping->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; - npages = sbi->meta_inode->i_mapping->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); - si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); -} - -static int stat_show(struct seq_file *s, void *v) -{ - struct f2fs_stat_info *si; - int i = 0; - int j; - - mutex_lock(&f2fs_stat_mutex); - list_for_each_entry(si, &f2fs_stat_list, stat_list) { - char devname[BDEVNAME_SIZE]; - - update_general_status(si->sbi); - - seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n", - bdevname(si->sbi->sb->s_bdev, devname), i++); - seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", - si->sit_area_segs, si->nat_area_segs); - seq_printf(s, "[SSA: %d] [MAIN: %d", - si->ssa_area_segs, si->main_area_segs); - seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", - si->overp_segs, si->rsvd_segs); - seq_printf(s, "Utilization: %d%% (%d valid blocks)\n", - si->utilization, si->valid_count); - seq_printf(s, " - Node: %u (Inode: %u, ", - si->valid_node_count, si->valid_inode_count); - seq_printf(s, "Other: %u)\n - Data: %u\n", - si->valid_node_count - si->valid_inode_count, - si->valid_count - si->valid_node_count); - seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", - si->main_area_segs, si->main_area_sections, - si->main_area_zones); - seq_printf(s, " - COLD data: %d, %d, %d\n", - si->curseg[CURSEG_COLD_DATA], - si->cursec[CURSEG_COLD_DATA], - si->curzone[CURSEG_COLD_DATA]); - seq_printf(s, " - WARM data: %d, %d, %d\n", - si->curseg[CURSEG_WARM_DATA], - si->cursec[CURSEG_WARM_DATA], - si->curzone[CURSEG_WARM_DATA]); - seq_printf(s, " - HOT data: %d, %d, %d\n", - si->curseg[CURSEG_HOT_DATA], - si->cursec[CURSEG_HOT_DATA], - si->curzone[CURSEG_HOT_DATA]); - seq_printf(s, " - Dir dnode: %d, %d, %d\n", - si->curseg[CURSEG_HOT_NODE], - si->cursec[CURSEG_HOT_NODE], - si->curzone[CURSEG_HOT_NODE]); - seq_printf(s, " - File dnode: %d, %d, %d\n", - si->curseg[CURSEG_WARM_NODE], - si->cursec[CURSEG_WARM_NODE], - si->curzone[CURSEG_WARM_NODE]); - seq_printf(s, " - Indir nodes: %d, %d, %d\n", - si->curseg[CURSEG_COLD_NODE], - si->cursec[CURSEG_COLD_NODE], - si->curzone[CURSEG_COLD_NODE]); - seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", - si->main_area_segs - si->dirty_count - - si->prefree_count - si->free_segs, - si->dirty_count); - seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", - si->prefree_count, si->free_segs, si->free_secs); - seq_printf(s, "GC calls: %d (BG: %d)\n", - si->call_count, si->bg_gc); - seq_printf(s, " - data segments : %d\n", si->data_segs); - seq_printf(s, " - node segments : %d\n", si->node_segs); - seq_printf(s, "Try to move %d blocks\n", si->tot_blks); - seq_printf(s, " - data blocks : %d\n", si->data_blks); - seq_printf(s, " - node blocks : %d\n", si->node_blks); - seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", - si->hit_ext, si->total_ext); - seq_printf(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - nodes %4d in %4d\n", - si->ndirty_node, si->node_pages); - seq_printf(s, " - dents %4d in dirs:%4d\n", - si->ndirty_dent, si->ndirty_dirs); - seq_printf(s, " - meta %4d in %4d\n", - si->ndirty_meta, si->meta_pages); - seq_printf(s, " - NATs %5d > %lu\n", - si->nats, NM_WOUT_THRESHOLD); - seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", - si->sits, si->fnids); - seq_puts(s, "\nDistribution of User Blocks:"); - seq_puts(s, " [ valid | invalid | free ]\n"); - seq_puts(s, " ["); - - for (j = 0; j < si->util_valid; j++) - seq_putc(s, '-'); - seq_putc(s, '|'); - - for (j = 0; j < si->util_invalid; j++) - seq_putc(s, '-'); - seq_putc(s, '|'); - - for (j = 0; j < si->util_free; j++) - seq_putc(s, '-'); - seq_puts(s, "]\n\n"); - seq_printf(s, "SSR: %u blocks in %u segments\n", - si->block_count[SSR], si->segment_count[SSR]); - seq_printf(s, "LFS: %u blocks in %u segments\n", - si->block_count[LFS], si->segment_count[LFS]); - - /* segment usage info */ - update_sit_info(si->sbi); - seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", - si->bimodal, si->avg_vblocks); - - /* memory footprint */ - update_mem_info(si->sbi); - seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", - (si->base_mem + si->cache_mem) >> 10, - si->base_mem >> 10, si->cache_mem >> 10); - } - mutex_unlock(&f2fs_stat_mutex); - return 0; -} - -static int stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, stat_show, inode->i_private); -} - -static const struct file_operations stat_fops = { - .open = stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int f2fs_build_stats(struct f2fs_sb_info *sbi) -{ - struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); - struct f2fs_stat_info *si; - - si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); - if (!si) - return -ENOMEM; - - si->all_area_segs = le32_to_cpu(raw_super->segment_count); - si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); - si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); - si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa); - si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); - si->main_area_sections = le32_to_cpu(raw_super->section_count); - si->main_area_zones = si->main_area_sections / - le32_to_cpu(raw_super->secs_per_zone); - si->sbi = sbi; - sbi->stat_info = si; - - mutex_lock(&f2fs_stat_mutex); - list_add_tail(&si->stat_list, &f2fs_stat_list); - mutex_unlock(&f2fs_stat_mutex); - - return 0; -} - -void f2fs_destroy_stats(struct f2fs_sb_info *sbi) -{ - struct f2fs_stat_info *si = F2FS_STAT(sbi); - - mutex_lock(&f2fs_stat_mutex); - list_del(&si->stat_list); - mutex_unlock(&f2fs_stat_mutex); - - kfree(si); -} - -void __init f2fs_create_root_stats(void) -{ - debugfs_root = debugfs_create_dir("f2fs", NULL); - if (debugfs_root) - debugfs_create_file("status", S_IRUGO, debugfs_root, - NULL, &stat_fops); -} - -void f2fs_destroy_root_stats(void) -{ - debugfs_remove_recursive(debugfs_root); - debugfs_root = NULL; -} diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c deleted file mode 100644 index 11cdb75aa0a..00000000000 --- a/fs/f2fs/dir.c +++ /dev/null @@ -1,714 +0,0 @@ -/* - * fs/f2fs/dir.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include "f2fs.h" -#include "node.h" -#include "acl.h" -#include "xattr.h" - -static unsigned long dir_blocks(struct inode *inode) -{ - return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) - >> PAGE_CACHE_SHIFT; -} - -static unsigned int dir_buckets(unsigned int level) -{ - if (level < MAX_DIR_HASH_DEPTH / 2) - return 1 << level; - else - return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); -} - -static unsigned int bucket_blocks(unsigned int level) -{ - if (level < MAX_DIR_HASH_DEPTH / 2) - return 2; - else - return 4; -} - -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { - [F2FS_FT_UNKNOWN] = DT_UNKNOWN, - [F2FS_FT_REG_FILE] = DT_REG, - [F2FS_FT_DIR] = DT_DIR, - [F2FS_FT_CHRDEV] = DT_CHR, - [F2FS_FT_BLKDEV] = DT_BLK, - [F2FS_FT_FIFO] = DT_FIFO, - [F2FS_FT_SOCK] = DT_SOCK, - [F2FS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, - [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, -}; - -static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) -{ - mode_t mode = inode->i_mode; - de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; -} - -static unsigned long dir_block_index(unsigned int level, unsigned int idx) -{ - unsigned long i; - unsigned long bidx = 0; - - for (i = 0; i < level; i++) - bidx += dir_buckets(i) * bucket_blocks(i); - bidx += idx * bucket_blocks(level); - return bidx; -} - -static bool early_match_name(const char *name, size_t namelen, - f2fs_hash_t namehash, struct f2fs_dir_entry *de) -{ - if (le16_to_cpu(de->name_len) != namelen) - return false; - - if (de->hash_code != namehash) - return false; - - return true; -} - -static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - const char *name, size_t namelen, int *max_slots, - f2fs_hash_t namehash, struct page **res_page, - bool nocase) -{ - struct f2fs_dir_entry *de; - unsigned long bit_pos, end_pos, next_pos; - struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); - int slots; - - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, 0); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - de = &dentry_blk->dentry[bit_pos]; - slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - - if (nocase) { - if ((le16_to_cpu(de->name_len) == namelen) && - !strncasecmp(dentry_blk->filename[bit_pos], - name, namelen)) { - *res_page = dentry_page; - goto found; - } - } else if (early_match_name(name, namelen, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name, namelen)) { - *res_page = dentry_page; - goto found; - } - } - next_pos = bit_pos + slots; - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, next_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - end_pos = NR_DENTRY_IN_BLOCK; - else - end_pos = bit_pos; - if (*max_slots < end_pos - next_pos) - *max_slots = end_pos - next_pos; - } - - de = NULL; - kunmap(dentry_page); -found: - return de; -} - -static struct f2fs_dir_entry *find_in_level(struct inode *dir, - unsigned int level, const char *name, size_t namelen, - f2fs_hash_t namehash, struct page **res_page) -{ - int s = GET_DENTRY_SLOTS(namelen); - unsigned int nbucket, nblock; - unsigned int bidx, end_block; - struct page *dentry_page; - struct f2fs_dir_entry *de = NULL; - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - bool room = false; - int max_slots = 0; - - BUG_ON(level > MAX_DIR_HASH_DEPTH); - - nbucket = dir_buckets(level); - nblock = bucket_blocks(level); - - bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); - end_block = bidx + nblock; - - for (; bidx < end_block; bidx++) { - bool nocase = false; - - /* no need to allocate new dentry pages to all the indices */ - dentry_page = find_data_page(dir, bidx, true); - if (IS_ERR(dentry_page)) { - room = true; - continue; - } - - if (test_opt(sbi, ANDROID_EMU) && - (sbi->android_emu_flags & F2FS_ANDROID_EMU_NOCASE) && - F2FS_I(dir)->i_advise & FADVISE_ANDROID_EMU) - nocase = true; - - de = find_in_block(dentry_page, name, namelen, - &max_slots, namehash, res_page, - nocase); - if (de) - break; - - if (max_slots >= s) - room = true; - f2fs_put_page(dentry_page, 0); - } - - if (!de && room && F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; - } - - return de; -} - -/* - * Find an entry in the specified directory with the wanted name. - * It returns the page where the entry was found (as a parameter - res_page), - * and the entry itself. Page is returned mapped and unlocked. - * Entry is guaranteed to be valid. - */ -struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - struct qstr *child, struct page **res_page) -{ - const char *name = child->name; - size_t namelen = child->len; - unsigned long npages = dir_blocks(dir); - struct f2fs_dir_entry *de = NULL; - f2fs_hash_t name_hash; - unsigned int max_depth; - unsigned int level; - - if (namelen > F2FS_NAME_LEN) - return NULL; - - if (npages == 0) - return NULL; - - *res_page = NULL; - - name_hash = f2fs_dentry_hash(name, namelen); - max_depth = F2FS_I(dir)->i_current_depth; - - for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, name, - namelen, name_hash, res_page); - if (de) - break; - } - if (!de && F2FS_I(dir)->chash != name_hash) { - F2FS_I(dir)->chash = name_hash; - F2FS_I(dir)->clevel = level - 1; - } - return de; -} - -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) -{ - struct page *page; - struct f2fs_dir_entry *de; - struct f2fs_dentry_block *dentry_blk; - - page = get_lock_data_page(dir, 0); - if (IS_ERR(page)) - return NULL; - - dentry_blk = kmap(page); - de = &dentry_blk->dentry[1]; - *p = page; - unlock_page(page); - return de; -} - -ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) -{ - ino_t res = 0; - struct f2fs_dir_entry *de; - struct page *page; - - de = f2fs_find_entry(dir, qstr, &page); - if (de) { - res = le32_to_cpu(de->ino); - kunmap(page); - f2fs_put_page(page, 0); - } - - return res; -} - -void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, - struct page *page, struct inode *inode) -{ - lock_page(page); - wait_on_page_writeback(page); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - kunmap(page); - set_page_dirty(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty(dir); - - /* update parent inode number before releasing dentry page */ - F2FS_I(inode)->i_pino = dir->i_ino; - - f2fs_put_page(page, 1); -} - -static void init_dent_inode(const struct qstr *name, struct page *ipage) -{ - struct f2fs_node *rn; - - /* copy name info. to this inode page */ - rn = F2FS_NODE(ipage); - rn->i.i_namelen = cpu_to_le32(name->len); - memcpy(rn->i.i_name, name->name, name->len); - set_page_dirty(ipage); -} - -int update_dent_inode(struct inode *inode, const struct qstr *name) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *page; - - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) - return PTR_ERR(page); - - init_dent_inode(name, page); - f2fs_put_page(page, 1); - - return 0; -} - -static int make_empty_dir(struct inode *inode, - struct inode *parent, struct page *page) -{ - struct page *dentry_page; - struct f2fs_dentry_block *dentry_blk; - struct f2fs_dir_entry *de; - void *kaddr; - - dentry_page = get_new_data_page(inode, page, 0, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); - - kaddr = kmap_atomic(dentry_page); - dentry_blk = (struct f2fs_dentry_block *)kaddr; - - de = &dentry_blk->dentry[0]; - de->name_len = cpu_to_le16(1); - de->hash_code = 0; - de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); - set_de_type(de, inode); - - de = &dentry_blk->dentry[1]; - de->hash_code = 0; - de->name_len = cpu_to_le16(2); - de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); - set_de_type(de, inode); - - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); - kunmap_atomic(kaddr); - - set_page_dirty(dentry_page); - f2fs_put_page(dentry_page, 1); - return 0; -} - -static struct page *init_inode_metadata(struct inode *inode, - struct inode *dir, const struct qstr *name) -{ - struct page *page; - int err; - - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - page = new_inode_page(inode, name); - if (IS_ERR(page)) - return page; - - if (S_ISDIR(inode->i_mode)) { - err = make_empty_dir(inode, dir, page); - if (err) - goto error; - } - - err = f2fs_init_acl(inode, dir); - if (err) - goto error; - - err = f2fs_init_security(inode, dir, name, page); - if (err) - goto error; - - wait_on_page_writeback(page); - } else { - page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); - if (IS_ERR(page)) - return page; - - wait_on_page_writeback(page); - set_cold_node(inode, page); - } - - init_dent_inode(name, page); - - /* - * This file should be checkpointed during fsync. - * We lost i_pino from now on. - */ - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { - file_lost_pino(inode); - inc_nlink(inode); - } - return page; - -error: - f2fs_put_page(page, 1); - remove_inode_page(inode); - return ERR_PTR(err); -} - -static void update_parent_metadata(struct inode *dir, struct inode *inode, - unsigned int current_depth) -{ - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - if (S_ISDIR(inode->i_mode)) { - inc_nlink(dir); - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - } - clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); - } - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - if (F2FS_I(dir)->i_current_depth != current_depth) { - F2FS_I(dir)->i_current_depth = current_depth; - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - } - - if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) - update_inode_page(dir); - else - mark_inode_dirty(dir); - - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); -} - -static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) -{ - int bit_start = 0; - int zero_start, zero_end; -next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_start); - if (zero_start >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; - - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - zero_start); - if (zero_end - zero_start >= slots) - return zero_start; - - bit_start = zero_end + 1; - - if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; - goto next; -} - -/* - * Caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). - */ -int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) -{ - unsigned int bit_pos; - unsigned int level; - unsigned int current_depth; - unsigned long bidx, block; - f2fs_hash_t dentry_hash; - struct f2fs_dir_entry *de; - unsigned int nbucket, nblock; - size_t namelen = name->len; - struct page *dentry_page = NULL; - struct f2fs_dentry_block *dentry_blk = NULL; - int slots = GET_DENTRY_SLOTS(namelen); - struct page *page; - int err = 0; - int i; - - dentry_hash = f2fs_dentry_hash(name->name, name->len); - level = 0; - current_depth = F2FS_I(dir)->i_current_depth; - if (F2FS_I(dir)->chash == dentry_hash) { - level = F2FS_I(dir)->clevel; - F2FS_I(dir)->chash = 0; - } - -start: - if (current_depth == MAX_DIR_HASH_DEPTH) - return -ENOSPC; - - /* Increase the depth, if required */ - if (level == current_depth) - ++current_depth; - - nbucket = dir_buckets(level); - nblock = bucket_blocks(level); - - bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); - - for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = get_new_data_page(dir, NULL, block, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); - - dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(dentry_blk, slots); - if (bit_pos < NR_DENTRY_IN_BLOCK) - goto add_dentry; - - kunmap(dentry_page); - f2fs_put_page(dentry_page, 1); - } - - /* Move to next level to find the empty slot for new dentry */ - ++level; - goto start; -add_dentry: - wait_on_page_writeback(dentry_page); - - page = init_inode_metadata(inode, dir, name); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; - } - de = &dentry_blk->dentry[bit_pos]; - de->hash_code = dentry_hash; - de->name_len = cpu_to_le16(namelen); - memcpy(dentry_blk->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - for (i = 0; i < slots; i++) - test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); - set_page_dirty(dentry_page); - - /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); - - update_parent_metadata(dir, inode, current_depth); -fail: - clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - kunmap(dentry_page); - f2fs_put_page(dentry_page, 1); - return err; -} - -/* - * It only removes the dentry from the dentry page,corresponding name - * entry in name page does not need to be touched during deletion. - */ -void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *inode) -{ - struct f2fs_dentry_block *dentry_blk; - unsigned int bit_pos; - struct address_space *mapping = page->mapping; - struct inode *dir = mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); - void *kaddr = page_address(page); - int i; - - lock_page(page); - wait_on_page_writeback(page); - - dentry_blk = (struct f2fs_dentry_block *)kaddr; - bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; - for (i = 0; i < slots; i++) - test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); - - /* Let's check and deallocate this dentry page */ - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - 0); - kunmap(page); /* kunmap - pair of f2fs_find_entry */ - set_page_dirty(page); - - dir->i_ctime = dir->i_mtime = CURRENT_TIME; - - if (inode && S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } else { - mark_inode_dirty(dir); - } - - if (inode) { - inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); - } - update_inode_page(inode); - - if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); - else - release_orphan_inode(sbi); - } - - if (bit_pos == NR_DENTRY_IN_BLOCK) { - truncate_hole(dir, page->index, page->index + 1); - clear_page_dirty_for_io(page); - ClearPageUptodate(page); - dec_page_count(sbi, F2FS_DIRTY_DENTS); - inode_dec_dirty_dents(dir); - } - f2fs_put_page(page, 1); -} - -bool f2fs_empty_dir(struct inode *dir) -{ - unsigned long bidx; - struct page *dentry_page; - unsigned int bit_pos; - struct f2fs_dentry_block *dentry_blk; - unsigned long nblock = dir_blocks(dir); - - for (bidx = 0; bidx < nblock; bidx++) { - void *kaddr; - dentry_page = get_lock_data_page(dir, bidx); - if (IS_ERR(dentry_page)) { - if (PTR_ERR(dentry_page) == -ENOENT) - continue; - else - return false; - } - - kaddr = kmap_atomic(dentry_page); - dentry_blk = (struct f2fs_dentry_block *)kaddr; - if (bidx == 0) - bit_pos = 2; - else - bit_pos = 0; - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - kunmap_atomic(kaddr); - - f2fs_put_page(dentry_page, 1); - - if (bit_pos < NR_DENTRY_IN_BLOCK) - return false; - } - return true; -} - -static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) -{ - unsigned long pos = file->f_pos; - struct inode *inode = file->f_dentry->d_inode; - unsigned long npages = dir_blocks(inode); - unsigned char *types = NULL; - unsigned int bit_pos = 0, start_bit_pos = 0; - int over = 0; - struct f2fs_dentry_block *dentry_blk = NULL; - struct f2fs_dir_entry *de = NULL; - struct page *dentry_page = NULL; - unsigned int n = 0; - unsigned char d_type = DT_UNKNOWN; - int slots; - - types = f2fs_filetype_table; - bit_pos = (pos % NR_DENTRY_IN_BLOCK); - n = (pos / NR_DENTRY_IN_BLOCK); - - for ( ; n < npages; n++) { - dentry_page = get_lock_data_page(inode, n); - if (IS_ERR(dentry_page)) - continue; - - start_bit_pos = bit_pos; - dentry_blk = kmap(dentry_page); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - d_type = DT_UNKNOWN; - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - break; - - de = &dentry_blk->dentry[bit_pos]; - if (types && de->file_type < F2FS_FT_MAX) - d_type = types[de->file_type]; - - over = filldir(dirent, - dentry_blk->filename[bit_pos], - le16_to_cpu(de->name_len), - (n * NR_DENTRY_IN_BLOCK) + bit_pos, - le32_to_cpu(de->ino), d_type); - if (over) { - file->f_pos += bit_pos - start_bit_pos; - goto success; - } - slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - bit_pos += slots; - } - bit_pos = 0; - file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; - kunmap(dentry_page); - f2fs_put_page(dentry_page, 1); - dentry_page = NULL; - } -success: - if (dentry_page && !IS_ERR(dentry_page)) { - kunmap(dentry_page); - f2fs_put_page(dentry_page, 1); - } - - return 0; -} - -const struct file_operations f2fs_dir_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = f2fs_readdir, - .fsync = f2fs_sync_file, - .unlocked_ioctl = f2fs_ioctl, -}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h deleted file mode 100644 index c6c24756047..00000000000 --- a/fs/f2fs/f2fs.h +++ /dev/null @@ -1,1290 +0,0 @@ -/* - * fs/f2fs/f2fs.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _LINUX_F2FS_H -#define _LINUX_F2FS_H - -#include -#include -#include -#include -#include -#include -#include - -/* - * For mount options - */ -#define F2FS_MOUNT_BG_GC 0x00000001 -#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 -#define F2FS_MOUNT_DISCARD 0x00000004 -#define F2FS_MOUNT_NOHEAP 0x00000008 -#define F2FS_MOUNT_XATTR_USER 0x00000010 -#define F2FS_MOUNT_POSIX_ACL 0x00000020 -#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 -#define F2FS_MOUNT_INLINE_XATTR 0x00000080 -#define F2FS_MOUNT_ANDROID_EMU 0x00001000 -#define F2FS_MOUNT_ERRORS_PANIC 0x00002000 -#define F2FS_MOUNT_ERRORS_RECOVER 0x00004000 - -#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) - -#define ver_after(a, b) (typecheck(unsigned long long, a) && \ - typecheck(unsigned long long, b) && \ - ((long long)((a) - (b)) > 0)) - -typedef u32 block_t; /* - * should not change u32, since it is the on-disk block - * address format, __le32. - */ -typedef u32 nid_t; - -struct f2fs_mount_info { - unsigned int opt; -}; - -#define CRCPOLY_LE 0xedb88320 - -static inline __u32 f2fs_crc32(void *buf, size_t len) -{ - unsigned char *p = (unsigned char *)buf; - __u32 crc = F2FS_SUPER_MAGIC; - int i; - - while (len--) { - crc ^= *p++; - for (i = 0; i < 8; i++) - crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); - } - return crc; -} - -static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) -{ - return f2fs_crc32(buf, buf_size) == blk_crc; -} - -/* - * For checkpoint manager - */ -enum { - NAT_BITMAP, - SIT_BITMAP -}; - -/* for the list of orphan inodes */ -struct orphan_inode_entry { - struct list_head list; /* list head */ - nid_t ino; /* inode number */ -}; - -/* for the list of directory inodes */ -struct dir_inode_entry { - struct list_head list; /* list head */ - struct inode *inode; /* vfs inode pointer */ -}; - -/* for the list of fsync inodes, used only during recovery */ -struct fsync_inode_entry { - struct list_head list; /* list head */ - struct inode *inode; /* vfs inode pointer */ - block_t blkaddr; /* block address locating the last inode */ -}; - -#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) -#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) - -#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) -#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) -#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) -#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) - -static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) -{ - int before = nats_in_cursum(rs); - rs->n_nats = cpu_to_le16(before + i); - return before; -} - -static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) -{ - int before = sits_in_cursum(rs); - rs->n_sits = cpu_to_le16(before + i); - return before; -} - -/* - * ioctl commands - */ -#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS - -#if defined(__KERNEL__) && defined(CONFIG_COMPAT) -/* - * ioctl commands in 32 bit emulation - */ -#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#endif - -/* - * For INODE and NODE manager - */ -/* - * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 - * as its node offset to distinguish from index node blocks. - * But some bits are used to mark the node block. - */ -#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \ - >> OFFSET_BIT_SHIFT) -enum { - ALLOC_NODE, /* allocate a new node page if needed */ - LOOKUP_NODE, /* look up a node without readahead */ - LOOKUP_NODE_RA, /* - * look up a node with readahead called - * by get_datablock_ro. - */ -}; - -#define F2FS_LINK_MAX 32000 /* maximum link count per file */ - -/* for in-memory extent cache entry */ -struct extent_info { - rwlock_t ext_lock; /* rwlock for consistency */ - unsigned int fofs; /* start offset in a file */ - u32 blk_addr; /* start block address of the extent */ - unsigned int len; /* length of the extent */ -}; - -/* - * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. - */ -#define FADVISE_COLD_BIT 0x01 -#define FADVISE_LOST_PINO_BIT 0x02 -#define FADVISE_ANDROID_EMU 0x10 -#define FADVISE_ANDROID_EMU_ROOT 0x20 - -struct f2fs_inode_info { - struct inode vfs_inode; /* serve a vfs inode */ - unsigned long i_flags; /* keep an inode flags for ioctl */ - unsigned char i_advise; /* use to give file attribute hints */ - unsigned int i_current_depth; /* use only in directory structure */ - unsigned int i_pino; /* parent inode number */ - umode_t i_acl_mode; /* keep file acl mode temporarily */ - - /* Use below internally in f2fs*/ - unsigned long flags; /* use to pass per-file flags */ - atomic_t dirty_dents; /* # of dirty dentry pages */ - f2fs_hash_t chash; /* hash value of given file name */ - unsigned int clevel; /* maximum level of given file name */ - nid_t i_xattr_nid; /* node id that contains xattrs */ - unsigned long long xattr_ver; /* cp version of xattr modification */ - struct extent_info ext; /* in-memory extent cache entry */ -}; - -static inline void get_extent_info(struct extent_info *ext, - struct f2fs_extent i_ext) -{ - write_lock(&ext->ext_lock); - ext->fofs = le32_to_cpu(i_ext.fofs); - ext->blk_addr = le32_to_cpu(i_ext.blk_addr); - ext->len = le32_to_cpu(i_ext.len); - write_unlock(&ext->ext_lock); -} - -static inline void set_raw_extent(struct extent_info *ext, - struct f2fs_extent *i_ext) -{ - read_lock(&ext->ext_lock); - i_ext->fofs = cpu_to_le32(ext->fofs); - i_ext->blk_addr = cpu_to_le32(ext->blk_addr); - i_ext->len = cpu_to_le32(ext->len); - read_unlock(&ext->ext_lock); -} - -struct f2fs_nm_info { - block_t nat_blkaddr; /* base disk address of NAT */ - nid_t max_nid; /* maximum possible node ids */ - nid_t next_scan_nid; /* the next nid to be scanned */ - - /* NAT cache management */ - struct radix_tree_root nat_root;/* root of the nat entry cache */ - rwlock_t nat_tree_lock; /* protect nat_tree_lock */ - unsigned int nat_cnt; /* the # of cached nat entries */ - struct list_head nat_entries; /* cached nat entry list (clean) */ - struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ - - /* free node ids management */ - struct list_head free_nid_list; /* a list for free nids */ - spinlock_t free_nid_list_lock; /* protect free nid list */ - unsigned int fcnt; /* the number of free node id */ - struct mutex build_lock; /* lock for build free nids */ - - /* for checkpoint */ - char *nat_bitmap; /* NAT bitmap pointer */ - int bitmap_size; /* bitmap size */ -}; - -/* - * this structure is used as one of function parameters. - * all the information are dedicated to a given direct node block determined - * by the data offset in a file. - */ -struct dnode_of_data { - struct inode *inode; /* vfs inode pointer */ - struct page *inode_page; /* its inode page, NULL is possible */ - struct page *node_page; /* cached direct node page */ - nid_t nid; /* node id of the direct node block */ - unsigned int ofs_in_node; /* data offset in the node page */ - bool inode_page_locked; /* inode page is locked or not */ - block_t data_blkaddr; /* block address of the node block */ -}; - -static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, - struct page *ipage, struct page *npage, nid_t nid) -{ - memset(dn, 0, sizeof(*dn)); - dn->inode = inode; - dn->inode_page = ipage; - dn->node_page = npage; - dn->nid = nid; -} - -/* - * For SIT manager - * - * By default, there are 6 active log areas across the whole main area. - * When considering hot and cold data separation to reduce cleaning overhead, - * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, - * respectively. - * In the current design, you should not change the numbers intentionally. - * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 - * logs individually according to the underlying devices. (default: 6) - * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for - * data and 8 for node logs. - */ -#define NR_CURSEG_DATA_TYPE (3) -#define NR_CURSEG_NODE_TYPE (3) -#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) - -enum { - CURSEG_HOT_DATA = 0, /* directory entry blocks */ - CURSEG_WARM_DATA, /* data blocks */ - CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ - CURSEG_HOT_NODE, /* direct node blocks of directory files */ - CURSEG_WARM_NODE, /* direct node blocks of normal files */ - CURSEG_COLD_NODE, /* indirect node blocks */ - NO_CHECK_TYPE -}; - -struct f2fs_sm_info { - struct sit_info *sit_info; /* whole segment information */ - struct free_segmap_info *free_info; /* free segment information */ - struct dirty_seglist_info *dirty_info; /* dirty segment information */ - struct curseg_info *curseg_array; /* active segment information */ - - struct list_head wblist_head; /* list of under-writeback pages */ - spinlock_t wblist_lock; /* lock for checkpoint */ - - block_t seg0_blkaddr; /* block address of 0'th segment */ - block_t main_blkaddr; /* start block address of main area */ - block_t ssa_blkaddr; /* start block address of SSA area */ - - unsigned int segment_count; /* total # of segments */ - unsigned int main_segments; /* # of segments in main area */ - unsigned int reserved_segments; /* # of reserved segments */ - unsigned int ovp_segments; /* # of overprovision segments */ -}; - -/* - * For superblock - */ -/* - * COUNT_TYPE for monitoring - * - * f2fs monitors the number of several block types such as on-writeback, - * dirty dentry blocks, dirty node blocks, and dirty meta blocks. - */ -enum count_type { - F2FS_WRITEBACK, - F2FS_DIRTY_DENTS, - F2FS_DIRTY_NODES, - F2FS_DIRTY_META, - NR_COUNT_TYPE, -}; - -/* - * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. - * The checkpoint procedure blocks all the locks in this fs_lock array. - * Some FS operations grab free locks, and if there is no free lock, - * then wait to grab a lock in a round-robin manner. - */ -#define NR_GLOBAL_LOCKS 8 - -/* - * The below are the page types of bios used in submti_bio(). - * The available types are: - * DATA User data pages. It operates as async mode. - * NODE Node pages. It operates as async mode. - * META FS metadata pages such as SIT, NAT, CP. - * NR_PAGE_TYPE The number of page types. - * META_FLUSH Make sure the previous pages are written - * with waiting the bio's completion - * ... Only can be used with META. - */ -enum page_type { - DATA, - NODE, - META, - NR_PAGE_TYPE, - META_FLUSH, -}; - -/* - * Android sdcard emulation flags - */ -#define F2FS_ANDROID_EMU_NOCASE 0x00000001 - -struct f2fs_sb_info { - struct super_block *sb; /* pointer to VFS super block */ - struct proc_dir_entry *s_proc; /* proc entry */ - struct buffer_head *raw_super_buf; /* buffer head of raw sb */ - struct f2fs_super_block *raw_super; /* raw super block pointer */ - int s_dirty; /* dirty flag for checkpoint */ - - /* for node-related operations */ - struct f2fs_nm_info *nm_info; /* node manager */ - struct inode *node_inode; /* cache node blocks */ - - /* for segment-related operations */ - struct f2fs_sm_info *sm_info; /* segment manager */ - struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ - sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ - struct rw_semaphore bio_sem; /* IO semaphore */ - - /* for checkpoint */ - struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ - struct inode *meta_inode; /* cache meta blocks */ - struct mutex cp_mutex; /* checkpoint procedure lock */ - struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ - struct mutex node_write; /* locking node writes */ - struct mutex writepages; /* mutex for writepages() */ - unsigned char next_lock_num; /* round-robin global locks */ - int por_doing; /* recovery is doing or not */ - int on_build_free_nids; /* build_free_nids is doing */ - - /* for orphan inode management */ - struct list_head orphan_inode_list; /* orphan inode list */ - struct mutex orphan_inode_mutex; /* for orphan inode list */ - unsigned int n_orphans; /* # of orphan inodes */ - - /* for directory inode management */ - struct list_head dir_inode_list; /* dir inode list */ - spinlock_t dir_inode_lock; /* for dir inode list lock */ - - /* basic file system units */ - unsigned int log_sectors_per_block; /* log2 sectors per block */ - unsigned int log_blocksize; /* log2 block size */ - unsigned int blocksize; /* block size */ - unsigned int root_ino_num; /* root inode number*/ - unsigned int node_ino_num; /* node inode number*/ - unsigned int meta_ino_num; /* meta inode number*/ - unsigned int log_blocks_per_seg; /* log2 blocks per segment */ - unsigned int blocks_per_seg; /* blocks per segment */ - unsigned int segs_per_sec; /* segments per section */ - unsigned int secs_per_zone; /* sections per zone */ - unsigned int total_sections; /* total section count */ - unsigned int total_node_count; /* total node block count */ - unsigned int total_valid_node_count; /* valid node block count */ - unsigned int total_valid_inode_count; /* valid inode count */ - int active_logs; /* # of active logs */ - - block_t user_block_count; /* # of user blocks */ - block_t total_valid_block_count; /* # of valid blocks */ - block_t alloc_valid_block_count; /* # of allocated blocks */ - block_t last_valid_block_count; /* for recovery */ - u32 s_next_generation; /* for NFS support */ - atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ - - struct f2fs_mount_info mount_opt; /* mount options */ - - /* for cleaning operations */ - struct mutex gc_mutex; /* mutex for GC */ - struct f2fs_gc_kthread *gc_thread; /* GC thread */ - unsigned int cur_victim_sec; /* current victim section num */ - - /* - * for stat information. - * one is for the LFS mode, and the other is for the SSR mode. - */ -#ifdef CONFIG_F2FS_STAT_FS - struct f2fs_stat_info *stat_info; /* FS status information */ - unsigned int segment_count[2]; /* # of allocated segments */ - unsigned int block_count[2]; /* # of allocated blocks */ - int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ - int bg_gc; /* background gc calls */ - unsigned int n_dirty_dirs; /* # of dir inodes */ -#endif - unsigned int last_victim[2]; /* last victim segment # */ - spinlock_t stat_lock; /* lock for stat operations */ - - /* For sysfs suppport */ - struct kobject s_kobj; - struct completion s_kobj_unregister; - - /* For Android sdcard emulation */ - u32 android_emu_uid; - u32 android_emu_gid; - umode_t android_emu_mode; - int android_emu_flags; -}; - -/* - * Inline functions - */ -static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) -{ - return container_of(inode, struct f2fs_inode_info, vfs_inode); -} - -static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) -{ - return (struct f2fs_super_block *)(sbi->raw_super); -} - -static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) -{ - return (struct f2fs_checkpoint *)(sbi->ckpt); -} - -static inline struct f2fs_node *F2FS_NODE(struct page *page) -{ - return (struct f2fs_node *)page_address(page); -} - -static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) -{ - return (struct f2fs_nm_info *)(sbi->nm_info); -} - -static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) -{ - return (struct f2fs_sm_info *)(sbi->sm_info); -} - -static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) -{ - return (struct sit_info *)(SM_I(sbi)->sit_info); -} - -static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) -{ - return (struct free_segmap_info *)(SM_I(sbi)->free_info); -} - -static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) -{ - return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); -} - -static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) -{ - sbi->s_dirty = 1; -} - -static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) -{ - sbi->s_dirty = 0; -} - -static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) -{ - return le64_to_cpu(cp->checkpoint_ver); -} - -static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) -{ - unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); - return ckpt_flags & f; -} - -static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) -{ - unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); - ckpt_flags |= f; - cp->ckpt_flags = cpu_to_le32(ckpt_flags); -} - -static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) -{ - unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); - ckpt_flags &= (~f); - cp->ckpt_flags = cpu_to_le32(ckpt_flags); -} - -static inline void mutex_lock_all(struct f2fs_sb_info *sbi) -{ - int i; - - for (i = 0; i < NR_GLOBAL_LOCKS; i++) { - /* - * This is the only time we take multiple fs_lock[] - * instances; the order is immaterial since we - * always hold cp_mutex, which serializes multiple - * such operations. - */ - mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); - } -} - -static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) -{ - int i = 0; - for (; i < NR_GLOBAL_LOCKS; i++) - mutex_unlock(&sbi->fs_lock[i]); -} - -static inline int mutex_lock_op(struct f2fs_sb_info *sbi) -{ - unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; - int i = 0; - - for (; i < NR_GLOBAL_LOCKS; i++) - if (mutex_trylock(&sbi->fs_lock[i])) - return i; - - mutex_lock(&sbi->fs_lock[next_lock]); - sbi->next_lock_num++; - return next_lock; -} - -static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) -{ - if (ilock < 0) - return; - BUG_ON(ilock >= NR_GLOBAL_LOCKS); - mutex_unlock(&sbi->fs_lock[ilock]); -} - -/* - * Check whether the given nid is within node id range. - */ -static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) -{ - WARN_ON((nid >= NM_I(sbi)->max_nid)); - if (nid >= NM_I(sbi)->max_nid) - return -EINVAL; - return 0; -} - -#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 - -/* - * Check whether the inode has blocks or not - */ -static inline int F2FS_HAS_BLOCKS(struct inode *inode) -{ - if (F2FS_I(inode)->i_xattr_nid) - return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); - else - return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); -} - -static inline int f2fs_handle_error(struct f2fs_sb_info *sbi) -{ - if (test_opt(sbi, ERRORS_PANIC)) - BUG(); - if (test_opt(sbi, ERRORS_RECOVER)) - return 1; - return 0; -} - -static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t count) -{ - block_t valid_block_count; - - spin_lock(&sbi->stat_lock); - valid_block_count = - sbi->total_valid_block_count + (block_t)count; - if (valid_block_count > sbi->user_block_count) { - spin_unlock(&sbi->stat_lock); - return false; - } - inode->i_blocks += count; - sbi->total_valid_block_count = valid_block_count; - sbi->alloc_valid_block_count += (block_t)count; - spin_unlock(&sbi->stat_lock); - return true; -} - -static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, - blkcnt_t count) -{ - spin_lock(&sbi->stat_lock); - - if (sbi->total_valid_block_count < (block_t)count) { - pr_crit("F2FS-fs (%s): block accounting error: %u < %llu\n", - sbi->sb->s_id, sbi->total_valid_block_count, count); - f2fs_handle_error(sbi); - sbi->total_valid_block_count = count; - } - if (inode->i_blocks < count) { - pr_crit("F2FS-fs (%s): inode accounting error: %llu < %llu\n", - sbi->sb->s_id, inode->i_blocks, count); - f2fs_handle_error(sbi); - inode->i_blocks = count; - } - - inode->i_blocks -= count; - sbi->total_valid_block_count -= (block_t)count; - spin_unlock(&sbi->stat_lock); - return 0; -} - -static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) -{ - atomic_inc(&sbi->nr_pages[count_type]); - F2FS_SET_SB_DIRT(sbi); -} - -static inline void inode_inc_dirty_dents(struct inode *inode) -{ - atomic_inc(&F2FS_I(inode)->dirty_dents); -} - -static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) -{ - atomic_dec(&sbi->nr_pages[count_type]); -} - -static inline void inode_dec_dirty_dents(struct inode *inode) -{ - atomic_dec(&F2FS_I(inode)->dirty_dents); -} - -static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) -{ - return atomic_read(&sbi->nr_pages[count_type]); -} - -static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) -{ - unsigned int pages_per_sec = sbi->segs_per_sec * - (1 << sbi->log_blocks_per_seg); - return ((get_pages(sbi, block_type) + pages_per_sec - 1) - >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; -} - -static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) -{ - block_t ret; - spin_lock(&sbi->stat_lock); - ret = sbi->total_valid_block_count; - spin_unlock(&sbi->stat_lock); - return ret; -} - -static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) -{ - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - - /* return NAT or SIT bitmap */ - if (flag == NAT_BITMAP) - return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); - else if (flag == SIT_BITMAP) - return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); - - return 0; -} - -static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) -{ - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - int offset = (flag == NAT_BITMAP) ? - le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; -} - -static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) -{ - block_t start_addr; - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - unsigned long long ckpt_version = cur_cp_version(ckpt); - - start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); - - /* - * odd numbered checkpoint should at cp segment 0 - * and even segent must be at cp segment 1 - */ - if (!(ckpt_version & 1)) - start_addr += sbi->blocks_per_seg; - - return start_addr; -} - -static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) -{ - return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); -} - -static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode, - unsigned int count) -{ - block_t valid_block_count; - unsigned int valid_node_count; - - spin_lock(&sbi->stat_lock); - - valid_block_count = sbi->total_valid_block_count + (block_t)count; - valid_node_count = sbi->total_valid_node_count + count; - - if (valid_block_count > sbi->user_block_count) { - spin_unlock(&sbi->stat_lock); - return false; - } - - if (valid_node_count > sbi->total_node_count) { - spin_unlock(&sbi->stat_lock); - return false; - } - - if (inode) - inode->i_blocks += count; - sbi->alloc_valid_block_count += (block_t)count; - sbi->total_valid_node_count = valid_node_count; - sbi->total_valid_block_count = valid_block_count; - spin_unlock(&sbi->stat_lock); - - return true; -} - -static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode, - unsigned int count) -{ - spin_lock(&sbi->stat_lock); - - if (sbi->total_valid_block_count < count) { - pr_crit("F2FS-fs (%s): block accounting error: %u < %u\n", - sbi->sb->s_id, sbi->total_valid_block_count, count); - f2fs_handle_error(sbi); - sbi->total_valid_block_count = count; - } - if (sbi->total_valid_node_count < count) { - pr_crit("F2FS-fs (%s): node accounting error: %u < %u\n", - sbi->sb->s_id, sbi->total_valid_node_count, count); - f2fs_handle_error(sbi); - sbi->total_valid_node_count = count; - } - if (inode->i_blocks < count) { - pr_crit("F2FS-fs (%s): inode accounting error: %llu < %u\n", - sbi->sb->s_id, inode->i_blocks, count); - f2fs_handle_error(sbi); - inode->i_blocks = count; - } - - inode->i_blocks -= count; - sbi->total_valid_node_count -= count; - sbi->total_valid_block_count -= (block_t)count; - - spin_unlock(&sbi->stat_lock); -} - -static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) -{ - unsigned int ret; - spin_lock(&sbi->stat_lock); - ret = sbi->total_valid_node_count; - spin_unlock(&sbi->stat_lock); - return ret; -} - -static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) -{ - spin_lock(&sbi->stat_lock); - BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); - sbi->total_valid_inode_count++; - spin_unlock(&sbi->stat_lock); -} - -static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) -{ - spin_lock(&sbi->stat_lock); - BUG_ON(!sbi->total_valid_inode_count); - sbi->total_valid_inode_count--; - spin_unlock(&sbi->stat_lock); - return 0; -} - -static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) -{ - unsigned int ret; - spin_lock(&sbi->stat_lock); - ret = sbi->total_valid_inode_count; - spin_unlock(&sbi->stat_lock); - return ret; -} - -static inline void f2fs_put_page(struct page *page, int unlock) -{ - if (!page || IS_ERR(page)) - return; - - if (unlock) { - BUG_ON(!PageLocked(page)); - unlock_page(page); - } - page_cache_release(page); -} - -static inline void f2fs_put_dnode(struct dnode_of_data *dn) -{ - if (dn->node_page) - f2fs_put_page(dn->node_page, 1); - if (dn->inode_page && dn->node_page != dn->inode_page) - f2fs_put_page(dn->inode_page, 0); - dn->node_page = NULL; - dn->inode_page = NULL; -} - -static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, - size_t size, void (*ctor)(void *)) -{ - return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); -} - -#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) - -static inline bool IS_INODE(struct page *page) -{ - struct f2fs_node *p = F2FS_NODE(page); - return RAW_IS_INODE(p); -} - -static inline __le32 *blkaddr_in_node(struct f2fs_node *node) -{ - return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; -} - -static inline block_t datablock_addr(struct page *node_page, - unsigned int offset) -{ - struct f2fs_node *raw_node; - __le32 *addr_array; - raw_node = F2FS_NODE(node_page); - addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[offset]); -} - -static inline int f2fs_test_bit(unsigned int nr, char *addr) -{ - int mask; - - addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); - return mask & *addr; -} - -static inline int f2fs_set_bit(unsigned int nr, char *addr) -{ - int mask; - int ret; - - addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); - ret = mask & *addr; - *addr |= mask; - return ret; -} - -static inline int f2fs_clear_bit(unsigned int nr, char *addr) -{ - int mask; - int ret; - - addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); - ret = mask & *addr; - *addr &= ~mask; - return ret; -} - -/* used for f2fs_inode_info->flags */ -enum { - FI_NEW_INODE, /* indicate newly allocated inode */ - FI_DIRTY_INODE, /* indicate inode is dirty or not */ - FI_INC_LINK, /* need to increment i_nlink */ - FI_ACL_MODE, /* indicate acl mode */ - FI_NO_ALLOC, /* should not allocate any blocks */ - FI_UPDATE_DIR, /* should update inode block for consistency */ - FI_DELAY_IPUT, /* used for the recovery */ - FI_INLINE_XATTR, /* used for inline xattr */ -}; - -static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - set_bit(flag, &fi->flags); -} - -static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) -{ - return test_bit(flag, &fi->flags); -} - -static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - clear_bit(flag, &fi->flags); -} - -static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) -{ - fi->i_acl_mode = mode; - set_inode_flag(fi, FI_ACL_MODE); -} - -static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - clear_inode_flag(fi, FI_ACL_MODE); - return 1; - } - return 0; -} - -int f2fs_android_emu(struct f2fs_sb_info *, struct inode *, u32 *, u32 *, - umode_t *); - -#define IS_ANDROID_EMU(sbi, fi, pfi) \ - (test_opt((sbi), ANDROID_EMU) && \ - (((fi)->i_advise & FADVISE_ANDROID_EMU) || \ - ((pfi)->i_advise & FADVISE_ANDROID_EMU))) - -static inline void get_inline_info(struct f2fs_inode_info *fi, - struct f2fs_inode *ri) -{ - if (ri->i_inline & F2FS_INLINE_XATTR) - set_inode_flag(fi, FI_INLINE_XATTR); -} - -static inline void set_raw_inline(struct f2fs_inode_info *fi, - struct f2fs_inode *ri) -{ - ri->i_inline = 0; - - if (is_inode_flag_set(fi, FI_INLINE_XATTR)) - ri->i_inline |= F2FS_INLINE_XATTR; -} - -static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) -{ - if (is_inode_flag_set(fi, FI_INLINE_XATTR)) - return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; - return DEF_ADDRS_PER_INODE; -} - -static inline void *inline_xattr_addr(struct page *page) -{ - struct f2fs_inode *ri; - ri = (struct f2fs_inode *)page_address(page); - return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS]); -} - -static inline int inline_xattr_size(struct inode *inode) -{ - if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) - return F2FS_INLINE_XATTR_ADDRS << 2; - else - return 0; -} - -static inline int f2fs_readonly(struct super_block *sb) -{ - return sb->s_flags & MS_RDONLY; -} - -/* - * file.c - */ -int f2fs_sync_file(struct file *, loff_t, loff_t, int); -void truncate_data_blocks(struct dnode_of_data *); -void f2fs_truncate(struct inode *); -int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -int f2fs_setattr(struct dentry *, struct iattr *); -int truncate_hole(struct inode *, pgoff_t, pgoff_t); -int truncate_data_blocks_range(struct dnode_of_data *, int); -long f2fs_ioctl(struct file *, unsigned int, unsigned long); -long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); - -/* - * inode.c - */ -void f2fs_set_inode_flags(struct inode *); -struct inode *f2fs_iget(struct super_block *, unsigned long); -void update_inode(struct inode *, struct page *); -int update_inode_page(struct inode *); -int f2fs_write_inode(struct inode *, struct writeback_control *); -void f2fs_evict_inode(struct inode *); - -/* - * namei.c - */ -struct dentry *f2fs_get_parent(struct dentry *child); - -/* - * dir.c - */ -struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, - struct page **); -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); -ino_t f2fs_inode_by_name(struct inode *, struct qstr *); -void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, - struct page *, struct inode *); -int update_dent_inode(struct inode *, const struct qstr *); -int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); -int f2fs_make_empty(struct inode *, struct inode *); -bool f2fs_empty_dir(struct inode *); - -static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) -{ - return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, - inode); -} - -/* - * super.c - */ -int f2fs_sync_fs(struct super_block *, int); -extern __printf(3, 4) -void f2fs_msg(struct super_block *, const char *, const char *, ...); - -/* - * hash.c - */ -f2fs_hash_t f2fs_dentry_hash(const char *, size_t); - -/* - * node.c - */ -struct dnode_of_data; -struct node_info; - -int is_checkpointed_node(struct f2fs_sb_info *, nid_t); -void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); -int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); -int truncate_inode_blocks(struct inode *, pgoff_t); -int truncate_xattr_node(struct inode *, struct page *); -int remove_inode_page(struct inode *); -struct page *new_inode_page(struct inode *, const struct qstr *); -struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); -void ra_node_page(struct f2fs_sb_info *, nid_t); -struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_node_page_ra(struct page *, int); -void sync_inode_page(struct dnode_of_data *); -int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); -bool alloc_nid(struct f2fs_sb_info *, nid_t *); -void alloc_nid_done(struct f2fs_sb_info *, nid_t); -void alloc_nid_failed(struct f2fs_sb_info *, nid_t); -void recover_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, struct node_info *, block_t); -int recover_inode_page(struct f2fs_sb_info *, struct page *); -int restore_node_summary(struct f2fs_sb_info *, unsigned int, - struct f2fs_summary_block *); -void flush_nat_entries(struct f2fs_sb_info *); -int build_node_manager(struct f2fs_sb_info *); -void destroy_node_manager(struct f2fs_sb_info *); -int __init create_node_manager_caches(void); -void destroy_node_manager_caches(void); - -/* - * segment.c - */ -void f2fs_balance_fs(struct f2fs_sb_info *); -void invalidate_blocks(struct f2fs_sb_info *, block_t); -void clear_prefree_segments(struct f2fs_sb_info *); -int npages_for_summary_flush(struct f2fs_sb_info *); -void allocate_new_segments(struct f2fs_sb_info *); -struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); -struct bio *f2fs_bio_alloc(struct block_device *, int); -void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); -void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); -void write_meta_page(struct f2fs_sb_info *, struct page *); -void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, - block_t, block_t *); -void write_data_page(struct inode *, struct page *, struct dnode_of_data*, - block_t, block_t *); -void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); -void recover_data_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); -void rewrite_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); -void write_data_summaries(struct f2fs_sb_info *, block_t); -void write_node_summaries(struct f2fs_sb_info *, block_t); -int lookup_journal_in_cursum(struct f2fs_summary_block *, - int, unsigned int, int); -void flush_sit_entries(struct f2fs_sb_info *); -int build_segment_manager(struct f2fs_sb_info *); -void destroy_segment_manager(struct f2fs_sb_info *); - -/* - * checkpoint.c - */ -struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); -int acquire_orphan_inode(struct f2fs_sb_info *); -void release_orphan_inode(struct f2fs_sb_info *); -void add_orphan_inode(struct f2fs_sb_info *, nid_t); -void remove_orphan_inode(struct f2fs_sb_info *, nid_t); -int recover_orphan_inodes(struct f2fs_sb_info *); -int get_valid_checkpoint(struct f2fs_sb_info *); -void set_dirty_dir_page(struct inode *, struct page *); -void add_dirty_dir_inode(struct inode *); -void remove_dirty_dir_inode(struct inode *); -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); -void sync_dirty_dir_inodes(struct f2fs_sb_info *); -void write_checkpoint(struct f2fs_sb_info *, bool); -void init_orphan_info(struct f2fs_sb_info *); -int __init create_checkpoint_caches(void); -void destroy_checkpoint_caches(void); - -/* - * data.c - */ -int reserve_new_block(struct dnode_of_data *); -void update_extent_cache(block_t, struct dnode_of_data *); -struct page *find_data_page(struct inode *, pgoff_t, bool); -struct page *get_lock_data_page(struct inode *, pgoff_t); -struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); -int do_write_data_page(struct page *); - -/* - * gc.c - */ -int start_gc_thread(struct f2fs_sb_info *); -void stop_gc_thread(struct f2fs_sb_info *); -block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); -int f2fs_gc(struct f2fs_sb_info *); -void build_gc_manager(struct f2fs_sb_info *); -int __init create_gc_caches(void); -void destroy_gc_caches(void); - -/* - * recovery.c - */ -int recover_fsync_data(struct f2fs_sb_info *); -bool space_for_roll_forward(struct f2fs_sb_info *); - -/* - * debug.c - */ -#ifdef CONFIG_F2FS_STAT_FS -struct f2fs_stat_info { - struct list_head stat_list; - struct f2fs_sb_info *sbi; - struct mutex stat_lock; - int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; - int main_area_segs, main_area_sections, main_area_zones; - int hit_ext, total_ext; - int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; - int nats, sits, fnids; - int total_count, utilization; - int bg_gc; - unsigned int valid_count, valid_node_count, valid_inode_count; - unsigned int bimodal, avg_vblocks; - int util_free, util_valid, util_invalid; - int rsvd_segs, overp_segs; - int dirty_count, node_pages, meta_pages; - int prefree_count, call_count; - int tot_segs, node_segs, data_segs, free_segs, free_secs; - int tot_blks, data_blks, node_blks; - int curseg[NR_CURSEG_TYPE]; - int cursec[NR_CURSEG_TYPE]; - int curzone[NR_CURSEG_TYPE]; - - unsigned int segment_count[2]; - unsigned int block_count[2]; - unsigned base_mem, cache_mem; -}; - -static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) -{ - return (struct f2fs_stat_info*)sbi->stat_info; -} - -#define stat_inc_call_count(si) ((si)->call_count++) - -#define stat_inc_seg_count(sbi, type) \ - do { \ - struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - (si)->tot_segs++; \ - if (type == SUM_TYPE_DATA) \ - si->data_segs++; \ - else \ - si->node_segs++; \ - } while (0) - -#define stat_inc_tot_blk_count(si, blks) \ - (si->tot_blks += (blks)) - -#define stat_inc_data_blk_count(sbi, blks) \ - do { \ - struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - stat_inc_tot_blk_count(si, blks); \ - si->data_blks += (blks); \ - } while (0) - -#define stat_inc_node_blk_count(sbi, blks) \ - do { \ - struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - stat_inc_tot_blk_count(si, blks); \ - si->node_blks += (blks); \ - } while (0) - -int f2fs_build_stats(struct f2fs_sb_info *); -void f2fs_destroy_stats(struct f2fs_sb_info *); -void __init f2fs_create_root_stats(void); -void f2fs_destroy_root_stats(void); -#else -#define stat_inc_call_count(si) -#define stat_inc_seg_count(si, type) -#define stat_inc_tot_blk_count(si, blks) -#define stat_inc_data_blk_count(si, blks) -#define stat_inc_node_blk_count(sbi, blks) - -static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } -static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -static inline void __init f2fs_create_root_stats(void) { } -static inline void f2fs_destroy_root_stats(void) { } -#endif - -extern const struct file_operations f2fs_dir_operations; -extern const struct file_operations f2fs_file_operations; -extern const struct inode_operations f2fs_file_inode_operations; -extern const struct address_space_operations f2fs_dblock_aops; -extern const struct address_space_operations f2fs_node_aops; -extern const struct address_space_operations f2fs_meta_aops; -extern const struct inode_operations f2fs_dir_inode_operations; -extern const struct inode_operations f2fs_symlink_inode_operations; -extern const struct inode_operations f2fs_special_inode_operations; -#endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c deleted file mode 100644 index cf4b51c628d..00000000000 --- a/fs/f2fs/file.c +++ /dev/null @@ -1,725 +0,0 @@ -/* - * fs/f2fs/file.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "segment.h" -#include "xattr.h" -#include "acl.h" -#include - -static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - block_t old_blk_addr; - struct dnode_of_data dn; - int err, ilock; - - f2fs_balance_fs(sbi); - - /* Wait if fs is frozen. This is racy so we check again later on - * and retry if the fs has been frozen after the page lock has - * been acquired - */ - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - - /* block allocation */ - ilock = mutex_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); - if (err) { - mutex_unlock_op(sbi, ilock); - goto out; - } - - old_blk_addr = dn.data_blkaddr; - - if (old_blk_addr == NULL_ADDR) { - err = reserve_new_block(&dn); - if (err) { - f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); - goto out; - } - } - f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); - - file_update_time(vma->vm_file); - lock_page(page); - if (page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode) || - !PageUptodate(page)) { - unlock_page(page); - err = -EFAULT; - goto out; - } - - /* - * check to see if the page is mapped already (no holes) - */ - if (PageMappedToDisk(page)) - goto mapped; - - /* page is wholly or partially inside EOF */ - if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { - unsigned offset; - offset = i_size_read(inode) & ~PAGE_CACHE_MASK; - zero_user_segment(page, offset, PAGE_CACHE_SIZE); - } - set_page_dirty(page); - SetPageUptodate(page); - -mapped: - /* fill the page */ - wait_on_page_writeback(page); -out: - return block_page_mkwrite_return(err); -} - -static const struct vm_operations_struct f2fs_file_vm_ops = { - .fault = filemap_fault, - .page_mkwrite = f2fs_vm_page_mkwrite, -}; - -static int get_parent_ino(struct inode *inode, nid_t *pino) -{ - struct dentry *dentry; - - inode = igrab(inode); - - /* Alex - the following is equivalent to: dentry = d_find_any_alias(inode); */ - dentry = NULL; - spin_lock(&inode->i_lock); - if (!list_empty(&inode->i_dentry)) { - dentry = list_first_entry(&inode->i_dentry, - struct dentry, d_alias); - dget(dentry); - } - spin_unlock(&inode->i_lock); - - iput(inode); - if (!dentry) - return 0; - - if (update_dent_inode(inode, &dentry->d_name)) { - dput(dentry); - return 0; - } - - *pino = parent_ino(dentry); - dput(dentry); - return 1; -} - -int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) -{ - struct inode *inode = file->f_mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ret = 0; - bool need_cp = false; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_reclaim = 0, - }; - - if (f2fs_readonly(inode->i_sb)) - return 0; - - trace_f2fs_sync_file_enter(inode); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) { - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); - return ret; - } - - /* guarantee free sections for fsync */ - f2fs_balance_fs(sbi); - - mutex_lock(&inode->i_mutex); - - /* - * Both of fdatasync() and fsync() are able to be recovered from - * sudden-power-off. - */ - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; - else if (file_wrong_pino(inode)) - need_cp = true; - else if (!space_for_roll_forward(sbi)) - need_cp = true; - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; - else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) - need_cp = true; - - if (need_cp) { - nid_t pino; - - F2FS_I(inode)->xattr_ver = 0; - - /* all the dirty node pages should be flushed for POR */ - ret = f2fs_sync_fs(inode->i_sb, 1); - if (file_wrong_pino(inode) && inode->i_nlink == 1 && - get_parent_ino(inode, &pino)) { - F2FS_I(inode)->i_pino = pino; - file_got_pino(inode); - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } - } else { - /* if there is no written node page, write its inode page */ - while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } - filemap_fdatawait_range(sbi->node_inode->i_mapping, - 0, LONG_MAX); - ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - } -out: - mutex_unlock(&inode->i_mutex); - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); - return ret; -} - -static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) -{ - file_accessed(file); - vma->vm_ops = &f2fs_file_vm_ops; - return 0; -} - -int truncate_data_blocks_range(struct dnode_of_data *dn, int count) -{ - int nr_free = 0, ofs = dn->ofs_in_node; - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct f2fs_node *raw_node; - __le32 *addr; - - raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + ofs; - - for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { - block_t blkaddr = le32_to_cpu(*addr); - if (blkaddr == NULL_ADDR) - continue; - - update_extent_cache(NULL_ADDR, dn); - invalidate_blocks(sbi, blkaddr); - nr_free++; - } - if (nr_free) { - dec_valid_block_count(sbi, dn->inode, nr_free); - set_page_dirty(dn->node_page); - sync_inode_page(dn); - } - dn->ofs_in_node = ofs; - - trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, - dn->ofs_in_node, nr_free); - return nr_free; -} - -void truncate_data_blocks(struct dnode_of_data *dn) -{ - truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); -} - -static void truncate_partial_data_page(struct inode *inode, u64 from) -{ - unsigned offset = from & (PAGE_CACHE_SIZE - 1); - struct page *page; - - if (!offset) - return; - - page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); - if (IS_ERR(page)) - return; - - lock_page(page); - if (page->mapping != inode->i_mapping) { - f2fs_put_page(page, 1); - return; - } - wait_on_page_writeback(page); - zero_user(page, offset, PAGE_CACHE_SIZE - offset); - set_page_dirty(page); - f2fs_put_page(page, 1); -} - -static int truncate_blocks(struct inode *inode, u64 from) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - unsigned int blocksize = inode->i_sb->s_blocksize; - struct dnode_of_data dn; - pgoff_t free_from; - int count = 0, ilock = -1; - int err; - - trace_f2fs_truncate_blocks_enter(inode, from); - - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); - - ilock = mutex_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); - if (err) { - if (err == -ENOENT) - goto free_next; - mutex_unlock_op(sbi, ilock); - trace_f2fs_truncate_blocks_exit(inode, err); - return err; - } - - if (IS_INODE(dn.node_page)) - count = ADDRS_PER_INODE(F2FS_I(inode)); - else - count = ADDRS_PER_BLOCK; - - count -= dn.ofs_in_node; - BUG_ON(count < 0); - - if (dn.ofs_in_node || IS_INODE(dn.node_page)) { - truncate_data_blocks_range(&dn, count); - free_from += count; - } - - f2fs_put_dnode(&dn); -free_next: - err = truncate_inode_blocks(inode, free_from); - mutex_unlock_op(sbi, ilock); - - /* lastly zero out the first data page */ - truncate_partial_data_page(inode, from); - - trace_f2fs_truncate_blocks_exit(inode, err); - return err; -} - -void f2fs_truncate(struct inode *inode) -{ - int err; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - - trace_f2fs_truncate(inode); - - err = truncate_blocks(inode, i_size_read(inode)); - if (err) { - f2fs_msg(inode->i_sb, KERN_ERR, "truncate failed with %d", - err); - f2fs_handle_error(F2FS_SB(inode->i_sb)); - } else { - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); - } -} - -int f2fs_getattr(struct vfsmount *mnt, - struct dentry *dentry, struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - generic_fillattr(inode, stat); - stat->blocks <<= 3; - return 0; -} - -#ifdef CONFIG_F2FS_FS_POSIX_ACL -static void __setattr_copy(struct inode *inode, const struct iattr *attr) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int ia_valid = attr->ia_valid; - - if (ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) - inode->i_atime = timespec_trunc(attr->ia_atime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_MTIME) - inode->i_mtime = timespec_trunc(attr->ia_mtime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_CTIME) - inode->i_ctime = timespec_trunc(attr->ia_ctime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_MODE) { - umode_t mode = attr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - set_acl_inode(fi, mode); - } -} -#else -#define __setattr_copy setattr_copy -#endif - -int f2fs_setattr(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = dentry->d_inode; - struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode_info *pfi = F2FS_I(dentry->d_parent->d_inode); - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int err; - - err = inode_change_ok(inode, attr); - if (err) - return err; - - if (IS_ANDROID_EMU(sbi, fi, pfi)) - f2fs_android_emu(sbi, inode, &attr->ia_uid, &attr->ia_gid, - &attr->ia_mode); - - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - truncate_setsize(inode, attr->ia_size); - f2fs_truncate(inode); - f2fs_balance_fs(F2FS_SB(inode->i_sb)); - } - - __setattr_copy(inode, attr); - - if (attr->ia_valid & ATTR_MODE) { - err = f2fs_acl_chmod(inode); - if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { - inode->i_mode = fi->i_acl_mode; - clear_inode_flag(fi, FI_ACL_MODE); - } - } - - mark_inode_dirty(inode); - return err; -} - -const struct inode_operations f2fs_file_inode_operations = { - .getattr = f2fs_getattr, - .setattr = f2fs_setattr, - .get_acl = f2fs_get_acl, -#ifdef CONFIG_F2FS_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = f2fs_listxattr, - .removexattr = generic_removexattr, -#endif -}; - -static void fill_zero(struct inode *inode, pgoff_t index, - loff_t start, loff_t len) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *page; - int ilock; - - if (!len) - return; - - f2fs_balance_fs(sbi); - - ilock = mutex_lock_op(sbi); - page = get_new_data_page(inode, NULL, index, false); - mutex_unlock_op(sbi, ilock); - - if (!IS_ERR(page)) { - wait_on_page_writeback(page); - zero_user(page, start, len); - set_page_dirty(page); - f2fs_put_page(page, 1); - } -} - -int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) -{ - pgoff_t index; - int err; - - for (index = pg_start; index < pg_end; index++) { - struct dnode_of_data dn; - - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) { - if (err == -ENOENT) - continue; - return err; - } - - if (dn.data_blkaddr != NULL_ADDR) - truncate_data_blocks_range(&dn, 1); - f2fs_put_dnode(&dn); - } - return 0; -} - -static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) -{ - pgoff_t pg_start, pg_end; - loff_t off_start, off_end; - int ret = 0; - - pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; - pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; - - off_start = offset & (PAGE_CACHE_SIZE - 1); - off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); - - if (pg_start == pg_end) { - fill_zero(inode, pg_start, off_start, - off_end - off_start); - } else { - if (off_start) - fill_zero(inode, pg_start++, off_start, - PAGE_CACHE_SIZE - off_start); - if (off_end) - fill_zero(inode, pg_end, 0, off_end); - - if (pg_start < pg_end) { - struct address_space *mapping = inode->i_mapping; - loff_t blk_start, blk_end; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ilock; - - f2fs_balance_fs(sbi); - - blk_start = pg_start << PAGE_CACHE_SHIFT; - blk_end = pg_end << PAGE_CACHE_SHIFT; - truncate_inode_pages_range(mapping, blk_start, - blk_end - 1); - - ilock = mutex_lock_op(sbi); - ret = truncate_hole(inode, pg_start, pg_end); - mutex_unlock_op(sbi, ilock); - } - } - - if (!(mode & FALLOC_FL_KEEP_SIZE) && - i_size_read(inode) <= (offset + len)) { - i_size_write(inode, offset); - mark_inode_dirty(inode); - } - - return ret; -} - -static int expand_inode_data(struct inode *inode, loff_t offset, - loff_t len, int mode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - pgoff_t index, pg_start, pg_end; - loff_t new_size = i_size_read(inode); - loff_t off_start, off_end; - int ret = 0; - - ret = inode_newsize_ok(inode, (len + offset)); - if (ret) - return ret; - - pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; - pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; - - off_start = offset & (PAGE_CACHE_SIZE - 1); - off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); - - for (index = pg_start; index <= pg_end; index++) { - struct dnode_of_data dn; - int ilock; - - ilock = mutex_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, index, ALLOC_NODE); - if (ret) { - mutex_unlock_op(sbi, ilock); - break; - } - - if (dn.data_blkaddr == NULL_ADDR) { - ret = reserve_new_block(&dn); - if (ret) { - f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); - break; - } - } - f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); - - if (pg_start == pg_end) - new_size = offset + len; - else if (index == pg_start && off_start) - new_size = (index + 1) << PAGE_CACHE_SHIFT; - else if (index == pg_end) - new_size = (index << PAGE_CACHE_SHIFT) + off_end; - else - new_size += PAGE_CACHE_SIZE; - } - - if (!(mode & FALLOC_FL_KEEP_SIZE) && - i_size_read(inode) < new_size) { - i_size_write(inode, new_size); - mark_inode_dirty(inode); - } - - return ret; -} - -static long f2fs_fallocate(struct file *file, int mode, - loff_t offset, loff_t len) -{ - struct inode *inode = file->f_path.dentry->d_inode; - long ret; - - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) - return -EOPNOTSUPP; - - if (mode & FALLOC_FL_PUNCH_HOLE) - ret = punch_hole(inode, offset, len, mode); - else - ret = expand_inode_data(inode, offset, len, mode); - - if (!ret) { - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); - } - trace_f2fs_fallocate(inode, mode, offset, len, ret); - return ret; -} - -#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) -#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) - -static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) -{ - if (S_ISDIR(mode)) - return flags; - else if (S_ISREG(mode)) - return flags & F2FS_REG_FLMASK; - else - return flags & F2FS_OTHER_FLMASK; -} - -long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = filp->f_dentry->d_inode; - struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags; - int ret; - - switch (cmd) { - case F2FS_IOC_GETFLAGS: - flags = fi->i_flags & FS_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); - case F2FS_IOC_SETFLAGS: - { - unsigned int oldflags; - - ret = mnt_want_write(filp->f_path.mnt); - if (ret) - return ret; - - if (!inode_owner_or_capable(inode)) { - ret = -EACCES; - goto out; - } - - if (get_user(flags, (int __user *) arg)) { - ret = -EFAULT; - goto out; - } - - flags = f2fs_mask_flags(inode->i_mode, flags); - - mutex_lock(&inode->i_mutex); - - oldflags = fi->i_flags; - - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - mutex_unlock(&inode->i_mutex); - ret = -EPERM; - goto out; - } - } - - flags = flags & FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; - mutex_unlock(&inode->i_mutex); - - f2fs_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); -out: - mnt_drop_write(filp->f_path.mnt); - return ret; - } - default: - return -ENOTTY; - } -} - -#ifdef CONFIG_COMPAT -long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case F2FS_IOC32_GETFLAGS: - cmd = F2FS_IOC_GETFLAGS; - break; - case F2FS_IOC32_SETFLAGS: - cmd = F2FS_IOC_SETFLAGS; - break; - default: - return -ENOIOCTLCMD; - } - return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); -} -#endif - -const struct file_operations f2fs_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, - .open = generic_file_open, - .mmap = f2fs_file_mmap, - .fsync = f2fs_sync_file, - .fallocate = f2fs_fallocate, - .unlocked_ioctl = f2fs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = f2fs_compat_ioctl, -#endif - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, -}; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c deleted file mode 100644 index e51c1b06b16..00000000000 --- a/fs/f2fs/gc.c +++ /dev/null @@ -1,738 +0,0 @@ -/* - * fs/f2fs/gc.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "segment.h" -#include "gc.h" -#include - -static struct kmem_cache *winode_slab; - -static int gc_thread_func(void *data) -{ - struct f2fs_sb_info *sbi = data; - struct f2fs_gc_kthread *gc_th = sbi->gc_thread; - wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; - long wait_ms; - - wait_ms = gc_th->min_sleep_time; - - do { - if (try_to_freeze()) - continue; - else - wait_event_interruptible_timeout(*wq, - kthread_should_stop(), - msecs_to_jiffies(wait_ms)); - if (kthread_should_stop()) - break; - - /* - * [GC triggering condition] - * 0. GC is not conducted currently. - * 1. There are enough dirty segments. - * 2. IO subsystem is idle by checking the # of writeback pages. - * 3. IO subsystem is idle by checking the # of requests in - * bdev's request list. - * - * Note) We have to avoid triggering GCs too much frequently. - * Because it is possible that some segments can be - * invalidated soon after by user update or deletion. - * So, I'd like to wait some time to collect dirty segments. - */ - if (!mutex_trylock(&sbi->gc_mutex)) - continue; - - if (!is_idle(sbi)) { - wait_ms = increase_sleep_time(gc_th, wait_ms); - mutex_unlock(&sbi->gc_mutex); - continue; - } - - if (has_enough_invalid_blocks(sbi)) - wait_ms = decrease_sleep_time(gc_th, wait_ms); - else - wait_ms = increase_sleep_time(gc_th, wait_ms); - -#ifdef CONFIG_F2FS_STAT_FS - sbi->bg_gc++; -#endif - - /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi)) - wait_ms = gc_th->no_gc_sleep_time; - } while (!kthread_should_stop()); - return 0; -} - -int start_gc_thread(struct f2fs_sb_info *sbi) -{ - struct f2fs_gc_kthread *gc_th; - dev_t dev = sbi->sb->s_bdev->bd_dev; - int err = 0; - - if (!test_opt(sbi, BG_GC)) - goto out; - gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) { - err = -ENOMEM; - goto out; - } - - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; - gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; - gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; - - gc_th->gc_idle = 0; - - sbi->gc_thread = gc_th; - init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); - sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, - "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(gc_th->f2fs_gc_task)) { - err = PTR_ERR(gc_th->f2fs_gc_task); - kfree(gc_th); - sbi->gc_thread = NULL; - } - -out: - return err; -} - -void stop_gc_thread(struct f2fs_sb_info *sbi) -{ - struct f2fs_gc_kthread *gc_th = sbi->gc_thread; - if (!gc_th) - return; - kthread_stop(gc_th->f2fs_gc_task); - kfree(gc_th); - sbi->gc_thread = NULL; -} - -static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) -{ - int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; - - if (gc_th && gc_th->gc_idle) { - if (gc_th->gc_idle == 1) - gc_mode = GC_CB; - else if (gc_th->gc_idle == 2) - gc_mode = GC_GREEDY; - } - return gc_mode; -} - -static void select_policy(struct f2fs_sb_info *sbi, int gc_type, - int type, struct victim_sel_policy *p) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - - if (p->alloc_mode == SSR) { - p->gc_mode = GC_GREEDY; - p->dirty_segmap = dirty_i->dirty_segmap[type]; - p->ofs_unit = 1; - } else { - p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); - p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; - p->ofs_unit = sbi->segs_per_sec; - } - p->offset = sbi->last_victim[p->gc_mode]; -} - -static unsigned int get_max_cost(struct f2fs_sb_info *sbi, - struct victim_sel_policy *p) -{ - /* SSR allocates in a segment unit */ - if (p->alloc_mode == SSR) - return 1 << sbi->log_blocks_per_seg; - if (p->gc_mode == GC_GREEDY) - return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; - else if (p->gc_mode == GC_CB) - return UINT_MAX; - else /* No other gc_mode */ - return 0; -} - -static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int hint = 0; - unsigned int secno; - - /* - * If the gc_type is FG_GC, we can select victim segments - * selected by background GC before. - * Those segments guarantee they have small valid blocks. - */ -next: - secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); - if (secno < TOTAL_SECS(sbi)) { - if (sec_usage_check(sbi, secno)) - goto next; - clear_bit(secno, dirty_i->victim_secmap); - return secno * sbi->segs_per_sec; - } - return NULL_SEGNO; -} - -static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) -{ - struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SECNO(sbi, segno); - unsigned int start = secno * sbi->segs_per_sec; - unsigned long long mtime = 0; - unsigned int vblocks; - unsigned char age = 0; - unsigned char u; - unsigned int i; - - for (i = 0; i < sbi->segs_per_sec; i++) - mtime += get_seg_entry(sbi, start + i)->mtime; - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); - - mtime = div_u64(mtime, sbi->segs_per_sec); - vblocks = div_u64(vblocks, sbi->segs_per_sec); - - u = (vblocks * 100) >> sbi->log_blocks_per_seg; - - /* Handle if the system time is changed by user */ - if (mtime < sit_i->min_mtime) - sit_i->min_mtime = mtime; - if (mtime > sit_i->max_mtime) - sit_i->max_mtime = mtime; - if (sit_i->max_mtime != sit_i->min_mtime) - age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), - sit_i->max_mtime - sit_i->min_mtime); - - return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); -} - -static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, - struct victim_sel_policy *p) -{ - if (p->alloc_mode == SSR) - return get_seg_entry(sbi, segno)->ckpt_valid_blocks; - - /* alloc_mode == LFS */ - if (p->gc_mode == GC_GREEDY) - return get_valid_blocks(sbi, segno, sbi->segs_per_sec); - else - return get_cb_cost(sbi, segno); -} - -/* - * This function is called from two paths. - * One is garbage collection and the other is SSR segment selection. - * When it is called during GC, it just gets a victim segment - * and it does not remove it from dirty seglist. - * When it is called from SSR segment selection, it finds a segment - * which has minimum valid blocks and removes it from dirty seglist. - */ -static int get_victim_by_default(struct f2fs_sb_info *sbi, - unsigned int *result, int gc_type, int type, char alloc_mode) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - struct victim_sel_policy p; - unsigned int secno, max_cost; - int nsearched = 0; - - p.alloc_mode = alloc_mode; - select_policy(sbi, gc_type, type, &p); - - p.min_segno = NULL_SEGNO; - p.min_cost = max_cost = get_max_cost(sbi, &p); - - mutex_lock(&dirty_i->seglist_lock); - - if (p.alloc_mode == LFS && gc_type == FG_GC) { - p.min_segno = check_bg_victims(sbi); - if (p.min_segno != NULL_SEGNO) - goto got_it; - } - - while (1) { - unsigned long cost; - unsigned int segno; - - segno = find_next_bit(p.dirty_segmap, - TOTAL_SEGS(sbi), p.offset); - if (segno >= TOTAL_SEGS(sbi)) { - if (sbi->last_victim[p.gc_mode]) { - sbi->last_victim[p.gc_mode] = 0; - p.offset = 0; - continue; - } - break; - } - p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; - secno = GET_SECNO(sbi, segno); - - if (sec_usage_check(sbi, secno)) - continue; - if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) - continue; - - cost = get_gc_cost(sbi, segno, &p); - - if (p.min_cost > cost) { - p.min_segno = segno; - p.min_cost = cost; - } - - if (cost == max_cost) - continue; - - if (nsearched++ >= MAX_VICTIM_SEARCH) { - sbi->last_victim[p.gc_mode] = segno; - break; - } - } - if (p.min_segno != NULL_SEGNO) { -got_it: - if (p.alloc_mode == LFS) { - secno = GET_SECNO(sbi, p.min_segno); - if (gc_type == FG_GC) - sbi->cur_victim_sec = secno; - else - set_bit(secno, dirty_i->victim_secmap); - } - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; - - trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, - sbi->cur_victim_sec, - prefree_segments(sbi), free_segments(sbi)); - } - mutex_unlock(&dirty_i->seglist_lock); - - return (p.min_segno == NULL_SEGNO) ? 0 : 1; -} - -static const struct victim_selection default_v_ops = { - .get_victim = get_victim_by_default, -}; - -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) -{ - struct inode_entry *ie; - - list_for_each_entry(ie, ilist, list) - if (ie->inode->i_ino == ino) - return ie->inode; - return NULL; -} - -static void add_gc_inode(struct inode *inode, struct list_head *ilist) -{ - struct inode_entry *new_ie; - - if (inode == find_gc_inode(inode->i_ino, ilist)) { - iput(inode); - return; - } -repeat: - new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); - if (!new_ie) { - cond_resched(); - goto repeat; - } - new_ie->inode = inode; - list_add_tail(&new_ie->list, ilist); -} - -static void put_gc_inode(struct list_head *ilist) -{ - struct inode_entry *ie, *next_ie; - list_for_each_entry_safe(ie, next_ie, ilist, list) { - iput(ie->inode); - list_del(&ie->list); - kmem_cache_free(winode_slab, ie); - } -} - -static int check_valid_map(struct f2fs_sb_info *sbi, - unsigned int segno, int offset) -{ - struct sit_info *sit_i = SIT_I(sbi); - struct seg_entry *sentry; - int ret; - - mutex_lock(&sit_i->sentry_lock); - sentry = get_seg_entry(sbi, segno); - ret = f2fs_test_bit(offset, sentry->cur_valid_map); - mutex_unlock(&sit_i->sentry_lock); - return ret; -} - -/* - * This function compares node address got in summary with that in NAT. - * On validity, copy that node with cold status, otherwise (invalid node) - * ignore that. - */ -static void gc_node_segment(struct f2fs_sb_info *sbi, - struct f2fs_summary *sum, unsigned int segno, int gc_type) -{ - bool initial = true; - struct f2fs_summary *entry; - int off; - -next_step: - entry = sum; - - for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { - nid_t nid = le32_to_cpu(entry->nid); - struct page *node_page; - - /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) - return; - - if (check_valid_map(sbi, segno, off) == 0) - continue; - - if (initial) { - ra_node_page(sbi, nid); - continue; - } - node_page = get_node_page(sbi, nid); - if (IS_ERR(node_page)) - continue; - - /* set page dirty and write it */ - if (gc_type == FG_GC) { - f2fs_wait_on_page_writeback(node_page, NODE, true); - set_page_dirty(node_page); - } else { - if (!PageWriteback(node_page)) - set_page_dirty(node_page); - } - f2fs_put_page(node_page, 1); - stat_inc_node_blk_count(sbi, 1); - } - - if (initial) { - initial = false; - goto next_step; - } - - if (gc_type == FG_GC) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_reclaim = 0, - }; - sync_node_pages(sbi, 0, &wbc); - - /* - * In the case of FG_GC, it'd be better to reclaim this victim - * completely. - */ - if (get_valid_blocks(sbi, segno, 1) != 0) - goto next_step; - } -} - -/* - * Calculate start block index indicating the given node offset. - * Be careful, caller should give this node offset only indicating direct node - * blocks. If any node offsets, which point the other types of node blocks such - * as indirect or double indirect node blocks, are given, it must be a caller's - * bug. - */ -block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) -{ - unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; - unsigned int bidx; - - if (node_ofs == 0) - return 0; - - if (node_ofs <= 2) { - bidx = node_ofs - 1; - } else if (node_ofs <= indirect_blks) { - int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); - bidx = node_ofs - 2 - dec; - } else { - int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); - bidx = node_ofs - 5 - dec; - } - return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); -} - -static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct node_info *dni, block_t blkaddr, unsigned int *nofs) -{ - struct page *node_page; - nid_t nid; - unsigned int ofs_in_node; - block_t source_blkaddr; - - nid = le32_to_cpu(sum->nid); - ofs_in_node = le16_to_cpu(sum->ofs_in_node); - - node_page = get_node_page(sbi, nid); - if (IS_ERR(node_page)) - return 0; - - get_node_info(sbi, nid, dni); - - if (sum->version != dni->version) { - f2fs_put_page(node_page, 1); - return 0; - } - - *nofs = ofs_of_node(node_page); - source_blkaddr = datablock_addr(node_page, ofs_in_node); - f2fs_put_page(node_page, 1); - - if (source_blkaddr != blkaddr) - return 0; - return 1; -} - -static void move_data_page(struct inode *inode, struct page *page, int gc_type) -{ - if (gc_type == BG_GC) { - if (PageWriteback(page)) - goto out; - set_page_dirty(page); - set_cold_data(page); - } else { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - - f2fs_wait_on_page_writeback(page, DATA, true); - - if (clear_page_dirty_for_io(page) && - S_ISDIR(inode->i_mode)) { - dec_page_count(sbi, F2FS_DIRTY_DENTS); - inode_dec_dirty_dents(inode); - } - set_cold_data(page); - do_write_data_page(page); - clear_cold_data(page); - } -out: - f2fs_put_page(page, 1); -} - -/* - * This function tries to get parent node of victim data block, and identifies - * data block validity. If the block is valid, copy that with cold status and - * modify parent node. - * If the parent node is not valid or the data block address is different, - * the victim data block is ignored. - */ -static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct list_head *ilist, unsigned int segno, int gc_type) -{ - struct super_block *sb = sbi->sb; - struct f2fs_summary *entry; - block_t start_addr; - int off; - int phase = 0; - - start_addr = START_BLOCK(sbi, segno); - -next_step: - entry = sum; - - for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { - struct page *data_page; - struct inode *inode; - struct node_info dni; /* dnode info for the data */ - unsigned int ofs_in_node, nofs; - block_t start_bidx; - - /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) - return; - - if (check_valid_map(sbi, segno, off) == 0) - continue; - - if (phase == 0) { - ra_node_page(sbi, le32_to_cpu(entry->nid)); - continue; - } - - /* Get an inode by ino with checking validity */ - if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) - continue; - - if (phase == 1) { - ra_node_page(sbi, dni.ino); - continue; - } - - ofs_in_node = le16_to_cpu(entry->ofs_in_node); - - if (phase == 2) { - inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode)) - continue; - - start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); - - data_page = find_data_page(inode, - start_bidx + ofs_in_node, false); - if (IS_ERR(data_page)) - goto next_iput; - - f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - start_bidx = start_bidx_of_node(nofs, - F2FS_I(inode)); - data_page = get_lock_data_page(inode, - start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); - } - } - continue; -next_iput: - iput(inode); - } - - if (++phase < 4) - goto next_step; - - if (gc_type == FG_GC) { - f2fs_submit_bio(sbi, DATA, true); - - /* - * In the case of FG_GC, it'd be better to reclaim this victim - * completely. - */ - if (get_valid_blocks(sbi, segno, 1) != 0) { - phase = 2; - goto next_step; - } - } -} - -static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type, int type) -{ - struct sit_info *sit_i = SIT_I(sbi); - int ret; - mutex_lock(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); - mutex_unlock(&sit_i->sentry_lock); - return ret; -} - -static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, - struct list_head *ilist, int gc_type) -{ - struct page *sum_page; - struct f2fs_summary_block *sum; - struct blk_plug plug; - - /* read segment summary of victim */ - sum_page = get_sum_page(sbi, segno); - if (IS_ERR(sum_page)) - return; - - blk_start_plug(&plug); - - sum = page_address(sum_page); - - switch (GET_SUM_TYPE((&sum->footer))) { - case SUM_TYPE_NODE: - gc_node_segment(sbi, sum->entries, segno, gc_type); - break; - case SUM_TYPE_DATA: - gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); - break; - } - blk_finish_plug(&plug); - - stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); - stat_inc_call_count(sbi->stat_info); - - f2fs_put_page(sum_page, 1); -} - -int f2fs_gc(struct f2fs_sb_info *sbi) -{ - struct list_head ilist; - unsigned int segno, i; - int gc_type = BG_GC; - int nfree = 0; - int ret = -1; - - INIT_LIST_HEAD(&ilist); -gc_more: - if (!(sbi->sb->s_flags & MS_ACTIVE)) - goto stop; - - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { - gc_type = FG_GC; - write_checkpoint(sbi, false); - } - - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) - goto stop; - ret = 0; - - for (i = 0; i < sbi->segs_per_sec; i++) - do_garbage_collect(sbi, segno + i, &ilist, gc_type); - - if (gc_type == FG_GC) { - sbi->cur_victim_sec = NULL_SEGNO; - nfree++; - WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); - } - - if (has_not_enough_free_secs(sbi, nfree)) - goto gc_more; - - if (gc_type == FG_GC) - write_checkpoint(sbi, false); -stop: - mutex_unlock(&sbi->gc_mutex); - - put_gc_inode(&ilist); - return ret; -} - -void build_gc_manager(struct f2fs_sb_info *sbi) -{ - DIRTY_I(sbi)->v_ops = &default_v_ops; -} - -int __init create_gc_caches(void) -{ - winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", - sizeof(struct inode_entry), NULL); - if (!winode_slab) - return -ENOMEM; - return 0; -} - -void destroy_gc_caches(void) -{ - kmem_cache_destroy(winode_slab); -} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h deleted file mode 100644 index f2a50cb2487..00000000000 --- a/fs/f2fs/gc.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * fs/f2fs/gc.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define GC_THREAD_MIN_WB_PAGES 1 /* - * a threshold to determine - * whether IO subsystem is idle - * or not - */ -#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ -#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 -#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ -#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ -#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ - -/* Search max. number of dirty segments to select a victim segment */ -#define MAX_VICTIM_SEARCH 20 - -struct f2fs_gc_kthread { - struct task_struct *f2fs_gc_task; - wait_queue_head_t gc_wait_queue_head; - - /* for gc sleep time */ - unsigned int min_sleep_time; - unsigned int max_sleep_time; - unsigned int no_gc_sleep_time; - - /* for changing gc mode */ - unsigned int gc_idle; -}; - -struct inode_entry { - struct list_head list; - struct inode *inode; -}; - -/* - * inline functions - */ -static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) -{ - if (free_segments(sbi) < overprovision_segments(sbi)) - return 0; - else - return (free_segments(sbi) - overprovision_segments(sbi)) - << sbi->log_blocks_per_seg; -} - -static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) -{ - return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; -} - -static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) -{ - block_t reclaimable_user_blocks = sbi->user_block_count - - written_block_count(sbi); - return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; -} - -static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) -{ - if (wait == gc_th->no_gc_sleep_time) - return wait; - - wait += gc_th->min_sleep_time; - if (wait > gc_th->max_sleep_time) - wait = gc_th->max_sleep_time; - return wait; -} - -static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) -{ - if (wait == gc_th->no_gc_sleep_time) - wait = gc_th->max_sleep_time; - - wait -= gc_th->min_sleep_time; - if (wait <= gc_th->min_sleep_time) - wait = gc_th->min_sleep_time; - return wait; -} - -static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) -{ - block_t invalid_user_blocks = sbi->user_block_count - - written_block_count(sbi); - /* - * Background GC is triggered with the following condition. - * 1. There are a number of invalid blocks. - * 2. There is not enough free space. - */ - if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && - free_user_blocks(sbi) < limit_free_user_blocks(sbi)) - return true; - return false; -} - -static inline int is_idle(struct f2fs_sb_info *sbi) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->rq; - return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); -} diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c deleted file mode 100644 index 6eb8d269b53..00000000000 --- a/fs/f2fs/hash.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * fs/f2fs/hash.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * Portions of this code from linux/fs/ext3/hash.c - * - * Copyright (C) 2002 by Theodore Ts'o - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include - -#include "f2fs.h" - -/* - * Hashing code copied from ext3 - */ -#define DELTA 0x9E3779B9 - -static void TEA_transform(unsigned int buf[4], unsigned int const in[]) -{ - __u32 sum = 0; - __u32 b0 = buf[0], b1 = buf[1]; - __u32 a = in[0], b = in[1], c = in[2], d = in[3]; - int n = 16; - - do { - sum += DELTA; - b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); - b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); - } while (--n); - - buf[0] += b0; - buf[1] += b1; -} - -static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) -{ - unsigned pad, val; - int i; - - pad = (__u32)len | ((__u32)len << 8); - pad |= pad << 16; - - val = pad; - if (len > num * 4) - len = num * 4; - for (i = 0; i < len; i++) { - if ((i % 4) == 0) - val = pad; - val = msg[i] + (val << 8); - if ((i % 4) == 3) { - *buf++ = val; - val = pad; - num--; - } - } - if (--num >= 0) - *buf++ = val; - while (--num >= 0) - *buf++ = pad; -} - -f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) -{ - __u32 hash; - f2fs_hash_t f2fs_hash; - const char *p; - __u32 in[8], buf[4]; - - if ((len <= 2) && (name[0] == '.') && - (name[1] == '.' || name[1] == '\0')) - return 0; - - /* Initialize the default seed for the hash checksum functions */ - buf[0] = 0x67452301; - buf[1] = 0xefcdab89; - buf[2] = 0x98badcfe; - buf[3] = 0x10325476; - - p = name; - while (1) { - str2hashbuf(p, len, in, 4); - TEA_transform(buf, in); - p += 16; - if (len <= 16) - break; - len -= 16; - } - hash = buf[0]; - f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); - return f2fs_hash; -} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c deleted file mode 100644 index b65e8f22f71..00000000000 --- a/fs/f2fs/inode.c +++ /dev/null @@ -1,273 +0,0 @@ -/* - * fs/f2fs/inode.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" - -#include - -void f2fs_set_inode_flags(struct inode *inode) -{ - unsigned int flags = F2FS_I(inode)->i_flags; - - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | - S_NOATIME | S_DIRSYNC); - - if (flags & FS_SYNC_FL) - inode->i_flags |= S_SYNC; - if (flags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; - if (flags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - if (flags & FS_NOATIME_FL) - inode->i_flags |= S_NOATIME; - if (flags & FS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; -} - -static int do_read_inode(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_inode_info *fi = F2FS_I(inode); - struct page *node_page; - struct f2fs_node *rn; - struct f2fs_inode *ri; - - /* Check if ino is within scope */ - if (check_nid_range(sbi, inode->i_ino)) { - f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", - (unsigned long) inode->i_ino); - return -EINVAL; - } - - node_page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); - - rn = F2FS_NODE(node_page); - ri = &(rn->i); - - inode->i_mode = le16_to_cpu(ri->i_mode); - inode->i_uid = le32_to_cpu(ri->i_uid); - inode->i_gid = le32_to_cpu(ri->i_gid); - set_nlink(inode, le32_to_cpu(ri->i_links)); - inode->i_size = le64_to_cpu(ri->i_size); - inode->i_blocks = le64_to_cpu(ri->i_blocks); - - inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); - inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); - inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); - inode->i_generation = le32_to_cpu(ri->i_generation); - if (ri->i_addr[0]) - inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); - else - inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); - - fi->i_current_depth = le32_to_cpu(ri->i_current_depth); - fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); - fi->i_flags = le32_to_cpu(ri->i_flags); - fi->flags = 0; - fi->i_advise = ri->i_advise; - fi->i_pino = le32_to_cpu(ri->i_pino); - get_extent_info(&fi->ext, ri->i_ext); - get_inline_info(fi, ri); - f2fs_put_page(node_page, 1); - return 0; -} - -struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct inode *inode; - int ret = 0; - - inode = iget_locked(sb, ino); - if (!inode) - return ERR_PTR(-ENOMEM); - - if (!(inode->i_state & I_NEW)) { - trace_f2fs_iget(inode); - return inode; - } - if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) - goto make_now; - - ret = do_read_inode(inode); - if (ret) - goto bad_inode; -make_now: - if (ino == F2FS_NODE_INO(sbi)) { - inode->i_mapping->a_ops = &f2fs_node_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); - } else if (ino == F2FS_META_INO(sbi)) { - inode->i_mapping->a_ops = &f2fs_meta_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); - } else if (S_ISREG(inode->i_mode)) { - inode->i_op = &f2fs_file_inode_operations; - inode->i_fop = &f2fs_file_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &f2fs_dir_inode_operations; - inode->i_fop = &f2fs_dir_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &f2fs_symlink_inode_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || - S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - inode->i_op = &f2fs_special_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - } else { - ret = -EIO; - goto bad_inode; - } - unlock_new_inode(inode); - trace_f2fs_iget(inode); - return inode; - -bad_inode: - iget_failed(inode); - trace_f2fs_iget_exit(inode, ret); - return ERR_PTR(ret); -} - -void update_inode(struct inode *inode, struct page *node_page) -{ - struct f2fs_node *rn; - struct f2fs_inode *ri; - - f2fs_wait_on_page_writeback(node_page, NODE, false); - - rn = F2FS_NODE(node_page); - ri = &(rn->i); - - ri->i_mode = cpu_to_le16(inode->i_mode); - ri->i_advise = F2FS_I(inode)->i_advise; - ri->i_uid = cpu_to_le32(inode->i_uid); - ri->i_gid = cpu_to_le32(inode->i_gid); - ri->i_links = cpu_to_le32(inode->i_nlink); - ri->i_size = cpu_to_le64(i_size_read(inode)); - ri->i_blocks = cpu_to_le64(inode->i_blocks); - set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); - set_raw_inline(F2FS_I(inode), ri); - - ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); - ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); - ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); - ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); - ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); - ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); - ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); - ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); - ri->i_generation = cpu_to_le32(inode->i_generation); - - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[0] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[1] = 0; - } else { - ri->i_addr[0] = 0; - ri->i_addr[1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[2] = 0; - } - } - - set_cold_node(inode, node_page); - set_page_dirty(node_page); - clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); -} - -int update_inode_page(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *node_page; - - node_page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); - - update_inode(inode, node_page); - f2fs_put_page(node_page, 1); - return 0; -} - -int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ret, ilock; - - if (inode->i_ino == F2FS_NODE_INO(sbi) || - inode->i_ino == F2FS_META_INO(sbi)) - return 0; - - if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) - return 0; - - /* - * We need to lock here to prevent from producing dirty node pages - * during the urgent cleaning time when runing out of free sections. - */ - ilock = mutex_lock_op(sbi); - ret = update_inode_page(inode); - mutex_unlock_op(sbi, ilock); - - if (wbc) - f2fs_balance_fs(sbi); - - return ret; -} - -/* - * Called at the last iput() if i_nlink is zero - */ -void f2fs_evict_inode(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ilock; - - trace_f2fs_evict_inode(inode); - truncate_inode_pages(&inode->i_data, 0); - - if (inode->i_ino == F2FS_NODE_INO(sbi) || - inode->i_ino == F2FS_META_INO(sbi)) - goto no_delete; - - BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); - remove_dirty_dir_inode(inode); - - if (inode->i_nlink || is_bad_inode(inode)) - goto no_delete; - - set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); - i_size_write(inode, 0); - - if (F2FS_HAS_BLOCKS(inode)) - f2fs_truncate(inode); - - ilock = mutex_lock_op(sbi); - remove_inode_page(inode); - mutex_unlock_op(sbi, ilock); - -no_delete: - end_writeback(inode); -} diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c deleted file mode 100644 index aa0c4539ab0..00000000000 --- a/fs/f2fs/namei.c +++ /dev/null @@ -1,557 +0,0 @@ -/* - * fs/f2fs/namei.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "xattr.h" -#include "acl.h" -#include - -static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) -{ - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - nid_t ino; - struct inode *inode; - bool nid_free = false; - int err, ilock; - - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); - - ilock = mutex_lock_op(sbi); - if (!alloc_nid(sbi, &ino)) { - mutex_unlock_op(sbi, ilock); - err = -ENOSPC; - goto fail; - } - mutex_unlock_op(sbi, ilock); - - if (IS_ANDROID_EMU(sbi, F2FS_I(dir), F2FS_I(dir))) - f2fs_android_emu(sbi, inode, &inode->i_uid, - &inode->i_gid, &mode); - else { - inode->i_uid = current_fsuid(); - - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else { - inode->i_gid = current_fsgid(); - } - } - - inode->i_ino = ino; - inode->i_mode = mode; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_generation = sbi->s_next_generation++; - - err = insert_inode_locked(inode); - if (err) { - err = -EINVAL; - nid_free = true; - goto out; - } - trace_f2fs_new_inode(inode, 0); - mark_inode_dirty(inode); - return inode; - -out: - clear_nlink(inode); - unlock_new_inode(inode); -fail: - trace_f2fs_new_inode(inode, err); - make_bad_inode(inode); - iput(inode); - if (nid_free) - alloc_nid_failed(sbi, ino); - return ERR_PTR(err); -} - -static int is_multimedia_file(const unsigned char *s, const char *sub) -{ - size_t slen = strlen(s); - size_t sublen = strlen(sub); - - if (sublen > slen) - return 0; - - return !strncasecmp(s + slen - sublen, sub, sublen); -} - -/* - * Set multimedia files as cold files for hot/cold data separation - */ -static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, - const unsigned char *name) -{ - int i; - __u8 (*extlist)[8] = sbi->raw_super->extension_list; - - int count = le32_to_cpu(sbi->raw_super->extension_count); - for (i = 0; i < count; i++) { - if (is_multimedia_file(name, extlist[i])) { - file_set_cold(inode); - break; - } - } -} - -static int f2fs_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct inode *inode; - nid_t ino = 0; - int err, ilock; - - f2fs_balance_fs(sbi); - - inode = f2fs_new_inode(dir, mode); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_files(sbi, inode, dentry->d_name.name); - - inode->i_op = &f2fs_file_inode_operations; - inode->i_fop = &f2fs_file_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - ino = inode->i_ino; - - ilock = mutex_lock_op(sbi); - err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); - if (err) - goto out; - - alloc_nid_done(sbi, ino); - - d_instantiate(dentry, inode); - unlock_new_inode(inode); - return 0; -out: - clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); - alloc_nid_failed(sbi, ino); - return err; -} - -static int f2fs_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *dentry) -{ - struct inode *inode = old_dentry->d_inode; - struct super_block *sb; - struct f2fs_sb_info *sbi; - int err, ilock; - - if (inode->i_nlink >= F2FS_LINK_MAX) - return -EMLINK; - - sb = dir->i_sb; - sbi = F2FS_SB(sb); - - f2fs_balance_fs(sbi); - - inode->i_ctime = CURRENT_TIME; - ihold(inode); - - set_inode_flag(F2FS_I(inode), FI_INC_LINK); - ilock = mutex_lock_op(sbi); - err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); - if (err) - goto out; - - d_instantiate(dentry, inode); - return 0; -out: - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); - iput(inode); - return err; -} - -struct dentry *f2fs_get_parent(struct dentry *child) -{ - struct qstr dotdot = {.name = "..", .len = 2}; - unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot); - if (!ino) - return ERR_PTR(-ENOENT); - return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); -} - -static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct inode *inode = NULL; - struct f2fs_dir_entry *de; - struct page *page; - - if (dentry->d_name.len > F2FS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - - de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (de) { - nid_t ino = le32_to_cpu(de->ino); - kunmap(page); - f2fs_put_page(page, 0); - - inode = f2fs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - } - - return d_splice_alias(inode, dentry); -} - -static int f2fs_unlink(struct inode *dir, struct dentry *dentry) -{ - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct inode *inode = dentry->d_inode; - struct f2fs_dir_entry *de; - struct page *page; - int err = -ENOENT; - int ilock; - - trace_f2fs_unlink_enter(dir, dentry); - f2fs_balance_fs(sbi); - - de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (!de) - goto fail; - - err = acquire_orphan_inode(sbi); - if (err) { - kunmap(page); - f2fs_put_page(page, 0); - goto fail; - } - - ilock = mutex_lock_op(sbi); - f2fs_delete_entry(de, page, inode); - mutex_unlock_op(sbi, ilock); - - /* In order to evict this inode, we set it dirty */ - mark_inode_dirty(inode); -fail: - trace_f2fs_unlink_exit(inode, err); - return err; -} - -static int f2fs_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct inode *inode; - size_t symlen = strlen(symname) + 1; - int err, ilock; - - f2fs_balance_fs(sbi); - - inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - inode->i_op = &f2fs_symlink_inode_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - - ilock = mutex_lock_op(sbi); - err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); - if (err) - goto out; - - err = page_symlink(inode, symname, symlen); - alloc_nid_done(sbi, inode->i_ino); - - d_instantiate(dentry, inode); - unlock_new_inode(inode); - return err; -out: - clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); - alloc_nid_failed(sbi, inode->i_ino); - return err; -} - -static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - struct f2fs_sb_info *sbi; - struct inode *inode; - int err, ilock; - - if (dir->i_nlink >= F2FS_LINK_MAX) - return -EMLINK; - - sbi = F2FS_SB(dir->i_sb); - f2fs_balance_fs(sbi); - - inode = f2fs_new_inode(dir, S_IFDIR | mode); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - inode->i_op = &f2fs_dir_inode_operations; - inode->i_fop = &f2fs_dir_operations; - inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); - - set_inode_flag(F2FS_I(inode), FI_INC_LINK); - ilock = mutex_lock_op(sbi); - err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); - if (err) - goto out_fail; - - alloc_nid_done(sbi, inode->i_ino); - - d_instantiate(dentry, inode); - unlock_new_inode(inode); - - return 0; - -out_fail: - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); - clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); - alloc_nid_failed(sbi, inode->i_ino); - return err; -} - -static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - if (f2fs_empty_dir(inode)) - return f2fs_unlink(dir, dentry); - return -ENOTEMPTY; -} - -static int f2fs_mknod(struct inode *dir, struct dentry *dentry, - int mode, dev_t rdev) -{ - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct inode *inode; - int err = 0; - int ilock; - - if (!new_valid_dev(rdev)) - return -EINVAL; - - f2fs_balance_fs(sbi); - - inode = f2fs_new_inode(dir, mode); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - init_special_inode(inode, inode->i_mode, rdev); - inode->i_op = &f2fs_special_inode_operations; - - ilock = mutex_lock_op(sbi); - err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); - if (err) - goto out; - - alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); - return 0; -out: - clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); - alloc_nid_failed(sbi, inode->i_ino); - return err; -} - -static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - struct super_block *sb = old_dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct inode *old_inode = old_dentry->d_inode; - struct inode *new_inode = new_dentry->d_inode; - struct page *old_dir_page; - struct page *old_page, *new_page; - struct f2fs_dir_entry *old_dir_entry = NULL; - struct f2fs_dir_entry *old_entry; - struct f2fs_dir_entry *new_entry; - int err = -ENOENT, ilock = -1; - - f2fs_balance_fs(sbi); - - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_entry) - goto out; - - if (S_ISDIR(old_inode->i_mode)) { - err = -EIO; - old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); - if (!old_dir_entry) - goto out_old; - } - - ilock = mutex_lock_op(sbi); - - if (new_inode) { - - err = -ENOTEMPTY; - if (old_dir_entry && !f2fs_empty_dir(new_inode)) - goto out_dir; - - err = -ENOENT; - new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, - &new_page); - if (!new_entry) - goto out_dir; - - err = acquire_orphan_inode(sbi); - if (err) - goto put_out_dir; - - if (update_dent_inode(old_inode, &new_dentry->d_name)) { - release_orphan_inode(sbi); - goto put_out_dir; - } - - f2fs_set_link(new_dir, new_entry, new_page, old_inode); - - new_inode->i_ctime = CURRENT_TIME; - if (old_dir_entry) - drop_nlink(new_inode); - drop_nlink(new_inode); - - if (!new_inode->i_nlink) - add_orphan_inode(sbi, new_inode->i_ino); - else - release_orphan_inode(sbi); - - update_inode_page(old_inode); - update_inode_page(new_inode); - } else { - if (old_dir_entry) { - err = -EMLINK; - if (new_dir->i_nlink >= F2FS_LINK_MAX) - goto out_dir; - } - - err = f2fs_add_link(new_dentry, old_inode); - if (err) - goto out_dir; - - if (old_dir_entry) { - inc_nlink(new_dir); - update_inode_page(new_dir); - } - } - - old_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(old_inode); - - f2fs_delete_entry(old_entry, old_page, NULL); - - if (old_dir_entry) { - if (old_dir != new_dir) { - f2fs_set_link(old_inode, old_dir_entry, - old_dir_page, new_dir); - } else { - kunmap(old_dir_page); - f2fs_put_page(old_dir_page, 0); - } - drop_nlink(old_dir); - update_inode_page(old_dir); - } - - mutex_unlock_op(sbi, ilock); - return 0; - -put_out_dir: - if (PageLocked(new_page)) - f2fs_put_page(new_page, 1); - else - f2fs_put_page(new_page, 0); -out_dir: - if (old_dir_entry) { - kunmap(old_dir_page); - f2fs_put_page(old_dir_page, 0); - } - mutex_unlock_op(sbi, ilock); -out_old: - kunmap(old_page); - f2fs_put_page(old_page, 0); -out: - return err; -} - -const struct inode_operations f2fs_dir_inode_operations = { - .create = f2fs_create, - .lookup = f2fs_lookup, - .link = f2fs_link, - .unlink = f2fs_unlink, - .symlink = f2fs_symlink, - .mkdir = f2fs_mkdir, - .rmdir = f2fs_rmdir, - .mknod = f2fs_mknod, - .rename = f2fs_rename, - .getattr = f2fs_getattr, - .setattr = f2fs_setattr, - .get_acl = f2fs_get_acl, -#ifdef CONFIG_F2FS_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = f2fs_listxattr, - .removexattr = generic_removexattr, -#endif -}; - -const struct inode_operations f2fs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, - .getattr = f2fs_getattr, - .setattr = f2fs_setattr, -#ifdef CONFIG_F2FS_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = f2fs_listxattr, - .removexattr = generic_removexattr, -#endif -}; - -const struct inode_operations f2fs_special_inode_operations = { - .getattr = f2fs_getattr, - .setattr = f2fs_setattr, - .get_acl = f2fs_get_acl, -#ifdef CONFIG_F2FS_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = f2fs_listxattr, - .removexattr = generic_removexattr, -#endif -}; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c deleted file mode 100644 index eac51226293..00000000000 --- a/fs/f2fs/node.c +++ /dev/null @@ -1,1859 +0,0 @@ -/* - * fs/f2fs/node.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "segment.h" -#include - -static struct kmem_cache *nat_entry_slab; -static struct kmem_cache *free_nid_slab; - -static void clear_node_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - unsigned int long flags; - - if (PageDirty(page)) { - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - - clear_page_dirty_for_io(page); - dec_page_count(sbi, F2FS_DIRTY_NODES); - } - ClearPageUptodate(page); -} - -static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) -{ - pgoff_t index = current_nat_addr(sbi, nid); - return get_meta_page(sbi, index); -} - -static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) -{ - struct page *src_page; - struct page *dst_page; - pgoff_t src_off; - pgoff_t dst_off; - void *src_addr; - void *dst_addr; - struct f2fs_nm_info *nm_i = NM_I(sbi); - - src_off = current_nat_addr(sbi, nid); - dst_off = next_nat_addr(sbi, src_off); - - /* get current nat block page with lock */ - src_page = get_meta_page(sbi, src_off); - - /* Dirty src_page means that it is already the new target NAT page. */ - if (PageDirty(src_page)) - return src_page; - - dst_page = grab_meta_page(sbi, dst_off); - - src_addr = page_address(src_page); - dst_addr = page_address(dst_page); - memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); - set_page_dirty(dst_page); - f2fs_put_page(src_page, 1); - - set_to_next_nat(nm_i, nid); - - return dst_page; -} - -/* - * Readahead NAT pages - */ -static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) -{ - struct address_space *mapping = sbi->meta_inode->i_mapping; - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct blk_plug plug; - struct page *page; - pgoff_t index; - int i; - - blk_start_plug(&plug); - - for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { - if (nid >= nm_i->max_nid) - nid = 0; - index = current_nat_addr(sbi, nid); - - page = grab_cache_page(mapping, index); - if (!page) - continue; - if (PageUptodate(page)) { - f2fs_put_page(page, 1); - continue; - } - if (f2fs_readpage(sbi, page, index, READ)) - continue; - - f2fs_put_page(page, 0); - } - blk_finish_plug(&plug); -} - -static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) -{ - return radix_tree_lookup(&nm_i->nat_root, n); -} - -static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, - nid_t start, unsigned int nr, struct nat_entry **ep) -{ - return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); -} - -static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) -{ - list_del(&e->list); - radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); - nm_i->nat_cnt--; - kmem_cache_free(nat_entry_slab, e); -} - -int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct nat_entry *e; - int is_cp = 1; - - read_lock(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); - if (e && !e->checkpointed) - is_cp = 0; - read_unlock(&nm_i->nat_tree_lock); - return is_cp; -} - -static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) -{ - struct nat_entry *new; - - new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } - memset(new, 0, sizeof(struct nat_entry)); - nat_set_nid(new, nid); - list_add_tail(&new->list, &nm_i->nat_entries); - nm_i->nat_cnt++; - return new; -} - -static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, - struct f2fs_nat_entry *ne) -{ - struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); - if (!e) { - e = grab_nat_entry(nm_i, nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } - nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); - nat_set_ino(e, le32_to_cpu(ne->ino)); - nat_set_version(e, ne->version); - e->checkpointed = true; - } - write_unlock(&nm_i->nat_tree_lock); -} - -static int set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, - block_t new_blkaddr) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ni->nid); - if (!e) { - e = grab_nat_entry(nm_i, ni->nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } - e->ni = *ni; - e->checkpointed = true; - BUG_ON(ni->blk_addr == NEW_ADDR); - } else if (new_blkaddr == NEW_ADDR) { - /* - * when nid is reallocated, - * previous nat entry can be remained in nat cache. - * So, reinitialize it with new information. - */ - e->ni = *ni; - if (ni->blk_addr != NULL_ADDR) { - f2fs_msg(sbi->sb, KERN_ERR, "node block address is " - "already set: %u", ni->blk_addr); - f2fs_handle_error(sbi); - /* just give up on this node */ - write_unlock(&nm_i->nat_tree_lock); - return -EIO; - } - } - - if (new_blkaddr == NEW_ADDR) - e->checkpointed = false; - - /* sanity check */ - BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); - BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && - new_blkaddr == NULL_ADDR); - BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && - new_blkaddr == NEW_ADDR); - BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && - nat_get_blkaddr(e) != NULL_ADDR && - new_blkaddr == NEW_ADDR); - - /* increament version no as node is removed */ - if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { - unsigned char version = nat_get_version(e); - nat_set_version(e, inc_node_version(version)); - } - - /* change address */ - nat_set_blkaddr(e, new_blkaddr); - __set_nat_cache_dirty(nm_i, e); - write_unlock(&nm_i->nat_tree_lock); - return 0; -} - -static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - - if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) - return 0; - - write_lock(&nm_i->nat_tree_lock); - while (nr_shrink && !list_empty(&nm_i->nat_entries)) { - struct nat_entry *ne; - ne = list_first_entry(&nm_i->nat_entries, - struct nat_entry, list); - __del_from_nat_cache(nm_i, ne); - nr_shrink--; - } - write_unlock(&nm_i->nat_tree_lock); - return nr_shrink; -} - -/* - * This function returns always success - */ -void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; - nid_t start_nid = START_NID(nid); - struct f2fs_nat_block *nat_blk; - struct page *page = NULL; - struct f2fs_nat_entry ne; - struct nat_entry *e; - int i; - - memset(&ne, 0, sizeof(struct f2fs_nat_entry)); - ni->nid = nid; - - /* Check nat cache */ - read_lock(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); - if (e) { - ni->ino = nat_get_ino(e); - ni->blk_addr = nat_get_blkaddr(e); - ni->version = nat_get_version(e); - } - read_unlock(&nm_i->nat_tree_lock); - if (e) - return; - - /* Check current segment summary */ - mutex_lock(&curseg->curseg_mutex); - i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); - if (i >= 0) { - ne = nat_in_journal(sum, i); - node_info_from_raw_nat(ni, &ne); - } - mutex_unlock(&curseg->curseg_mutex); - if (i >= 0) - goto cache; - - /* Fill node_info from nat page */ - page = get_current_nat_page(sbi, start_nid); - nat_blk = (struct f2fs_nat_block *)page_address(page); - ne = nat_blk->entries[nid - start_nid]; - node_info_from_raw_nat(ni, &ne); - f2fs_put_page(page, 1); -cache: - /* cache nat entry */ - cache_nat_entry(NM_I(sbi), nid, &ne); -} - -/* - * The maximum depth is four. - * Offset[0] will have raw inode offset. - */ -static int get_node_path(struct f2fs_inode_info *fi, long block, - int offset[4], unsigned int noffset[4]) -{ - const long direct_index = ADDRS_PER_INODE(fi); - const long direct_blks = ADDRS_PER_BLOCK; - const long dptrs_per_blk = NIDS_PER_BLOCK; - const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; - const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; - int n = 0; - int level = 0; - - noffset[0] = 0; - - if (block < direct_index) { - offset[n] = block; - goto got; - } - block -= direct_index; - if (block < direct_blks) { - offset[n++] = NODE_DIR1_BLOCK; - noffset[n] = 1; - offset[n] = block; - level = 1; - goto got; - } - block -= direct_blks; - if (block < direct_blks) { - offset[n++] = NODE_DIR2_BLOCK; - noffset[n] = 2; - offset[n] = block; - level = 1; - goto got; - } - block -= direct_blks; - if (block < indirect_blks) { - offset[n++] = NODE_IND1_BLOCK; - noffset[n] = 3; - offset[n++] = block / direct_blks; - noffset[n] = 4 + offset[n - 1]; - offset[n] = block % direct_blks; - level = 2; - goto got; - } - block -= indirect_blks; - if (block < indirect_blks) { - offset[n++] = NODE_IND2_BLOCK; - noffset[n] = 4 + dptrs_per_blk; - offset[n++] = block / direct_blks; - noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; - offset[n] = block % direct_blks; - level = 2; - goto got; - } - block -= indirect_blks; - if (block < dindirect_blks) { - offset[n++] = NODE_DIND_BLOCK; - noffset[n] = 5 + (dptrs_per_blk * 2); - offset[n++] = block / indirect_blks; - noffset[n] = 6 + (dptrs_per_blk * 2) + - offset[n - 1] * (dptrs_per_blk + 1); - offset[n++] = (block / direct_blks) % dptrs_per_blk; - noffset[n] = 7 + (dptrs_per_blk * 2) + - offset[n - 2] * (dptrs_per_blk + 1) + - offset[n - 1]; - offset[n] = block % direct_blks; - level = 3; - goto got; - } else { - BUG(); - } -got: - return level; -} - -/* - * Caller should call f2fs_put_dnode(dn). - * Also, it should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op() only if ro is not set RDONLY_NODE. - * In the case of RDONLY_NODE, we don't need to care about mutex. - */ -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct page *npage[4]; - struct page *parent; - int offset[4]; - unsigned int noffset[4]; - nid_t nids[4]; - int level, i; - int err = 0; - - level = get_node_path(F2FS_I(dn->inode), index, offset, noffset); - - nids[0] = dn->inode->i_ino; - npage[0] = dn->inode_page; - - if (!npage[0]) { - npage[0] = get_node_page(sbi, nids[0]); - if (IS_ERR(npage[0])) - return PTR_ERR(npage[0]); - } - parent = npage[0]; - if (level != 0) - nids[1] = get_nid(parent, offset[0], true); - dn->inode_page = npage[0]; - dn->inode_page_locked = true; - - /* get indirect or direct nodes */ - for (i = 1; i <= level; i++) { - bool done = false; - - if (!nids[i] && mode == ALLOC_NODE) { - /* alloc new node */ - if (!alloc_nid(sbi, &(nids[i]))) { - err = -ENOSPC; - goto release_pages; - } - - dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i], NULL); - if (IS_ERR(npage[i])) { - alloc_nid_failed(sbi, nids[i]); - err = PTR_ERR(npage[i]); - goto release_pages; - } - - set_nid(parent, offset[i - 1], nids[i], i == 1); - alloc_nid_done(sbi, nids[i]); - done = true; - } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { - npage[i] = get_node_page_ra(parent, offset[i - 1]); - if (IS_ERR(npage[i])) { - err = PTR_ERR(npage[i]); - goto release_pages; - } - done = true; - } - if (i == 1) { - dn->inode_page_locked = false; - unlock_page(parent); - } else { - f2fs_put_page(parent, 1); - } - - if (!done) { - npage[i] = get_node_page(sbi, nids[i]); - if (IS_ERR(npage[i])) { - err = PTR_ERR(npage[i]); - f2fs_put_page(npage[0], 0); - goto release_out; - } - } - if (i < level) { - parent = npage[i]; - nids[i + 1] = get_nid(parent, offset[i], false); - } - } - dn->nid = nids[level]; - dn->ofs_in_node = offset[level]; - dn->node_page = npage[level]; - dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); - return 0; - -release_pages: - f2fs_put_page(parent, 1); - if (i > 1) - f2fs_put_page(npage[0], 0); -release_out: - dn->inode_page = NULL; - dn->node_page = NULL; - return err; -} - -static void truncate_node(struct dnode_of_data *dn) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct node_info ni; - - get_node_info(sbi, dn->nid, &ni); - if (dn->inode->i_blocks == 0) { - if (ni.blk_addr != NULL_ADDR) { - f2fs_msg(sbi->sb, KERN_ERR, - "empty node still has block address %u ", - ni.blk_addr); - f2fs_handle_error(sbi); - } - goto invalidate; - } - BUG_ON(ni.blk_addr == NULL_ADDR); - - /* Deallocate node address */ - invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, dn->inode, 1); - set_node_addr(sbi, &ni, NULL_ADDR); - - if (dn->nid == dn->inode->i_ino) { - remove_orphan_inode(sbi, dn->nid); - dec_valid_inode_count(sbi); - } else { - sync_inode_page(dn); - } -invalidate: - clear_node_page_dirty(dn->node_page); - F2FS_SET_SB_DIRT(sbi); - - f2fs_put_page(dn->node_page, 1); - dn->node_page = NULL; - trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); -} - -static int truncate_dnode(struct dnode_of_data *dn) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct page *page; - - if (dn->nid == 0) - return 1; - - /* get direct node */ - page = get_node_page(sbi, dn->nid); - if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) - return 1; - else if (IS_ERR(page)) - return PTR_ERR(page); - - /* Make dnode_of_data for parameter */ - dn->node_page = page; - dn->ofs_in_node = 0; - truncate_data_blocks(dn); - truncate_node(dn); - return 1; -} - -static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, - int ofs, int depth) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct dnode_of_data rdn = *dn; - struct page *page; - struct f2fs_node *rn; - nid_t child_nid; - unsigned int child_nofs; - int freed = 0; - int i, ret; - - if (dn->nid == 0) - return NIDS_PER_BLOCK + 1; - - trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); - - page = get_node_page(sbi, dn->nid); - if (IS_ERR(page)) { - trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); - return PTR_ERR(page); - } - - rn = F2FS_NODE(page); - if (depth < 3) { - for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { - child_nid = le32_to_cpu(rn->in.nid[i]); - if (child_nid == 0) - continue; - rdn.nid = child_nid; - ret = truncate_dnode(&rdn); - if (ret < 0) - goto out_err; - set_nid(page, i, 0, false); - } - } else { - child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; - for (i = ofs; i < NIDS_PER_BLOCK; i++) { - child_nid = le32_to_cpu(rn->in.nid[i]); - if (child_nid == 0) { - child_nofs += NIDS_PER_BLOCK + 1; - continue; - } - rdn.nid = child_nid; - ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); - if (ret == (NIDS_PER_BLOCK + 1)) { - set_nid(page, i, 0, false); - child_nofs += ret; - } else if (ret < 0 && ret != -ENOENT) { - goto out_err; - } - } - freed = child_nofs; - } - - if (!ofs) { - /* remove current indirect node */ - dn->node_page = page; - truncate_node(dn); - freed++; - } else { - f2fs_put_page(page, 1); - } - trace_f2fs_truncate_nodes_exit(dn->inode, freed); - return freed; - -out_err: - f2fs_put_page(page, 1); - trace_f2fs_truncate_nodes_exit(dn->inode, ret); - return ret; -} - -static int truncate_partial_nodes(struct dnode_of_data *dn, - struct f2fs_inode *ri, int *offset, int depth) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct page *pages[2]; - nid_t nid[3]; - nid_t child_nid; - int err = 0; - int i; - int idx = depth - 2; - - nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); - if (!nid[0]) - return 0; - - /* get indirect nodes in the path */ - for (i = 0; i < depth - 1; i++) { - /* refernece count'll be increased */ - pages[i] = get_node_page(sbi, nid[i]); - if (IS_ERR(pages[i])) { - depth = i + 1; - err = PTR_ERR(pages[i]); - goto fail; - } - nid[i + 1] = get_nid(pages[i], offset[i + 1], false); - } - - /* free direct nodes linked to a partial indirect node */ - for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { - child_nid = get_nid(pages[idx], i, false); - if (!child_nid) - continue; - dn->nid = child_nid; - err = truncate_dnode(dn); - if (err < 0) - goto fail; - set_nid(pages[idx], i, 0, false); - } - - if (offset[depth - 1] == 0) { - dn->node_page = pages[idx]; - dn->nid = nid[idx]; - truncate_node(dn); - } else { - f2fs_put_page(pages[idx], 1); - } - offset[idx]++; - offset[depth - 1] = 0; -fail: - for (i = depth - 3; i >= 0; i--) - f2fs_put_page(pages[i], 1); - - trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); - - return err; -} - -/* - * All the block addresses of data and nodes should be nullified. - */ -int truncate_inode_blocks(struct inode *inode, pgoff_t from) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct address_space *node_mapping = sbi->node_inode->i_mapping; - int err = 0, cont = 1; - int level, offset[4], noffset[4]; - unsigned int nofs = 0; - struct f2fs_node *rn; - struct dnode_of_data dn; - struct page *page; - - trace_f2fs_truncate_inode_blocks_enter(inode, from); - - level = get_node_path(F2FS_I(inode), from, offset, noffset); -restart: - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); - return PTR_ERR(page); - } - - set_new_dnode(&dn, inode, page, NULL, 0); - unlock_page(page); - - rn = F2FS_NODE(page); - switch (level) { - case 0: - case 1: - nofs = noffset[1]; - break; - case 2: - nofs = noffset[1]; - if (!offset[level - 1]) - goto skip_partial; - err = truncate_partial_nodes(&dn, &rn->i, offset, level); - if (err < 0 && err != -ENOENT) - goto fail; - nofs += 1 + NIDS_PER_BLOCK; - break; - case 3: - nofs = 5 + 2 * NIDS_PER_BLOCK; - if (!offset[level - 1]) - goto skip_partial; - err = truncate_partial_nodes(&dn, &rn->i, offset, level); - if (err < 0 && err != -ENOENT) - goto fail; - break; - default: - BUG(); - } - -skip_partial: - while (cont) { - dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); - switch (offset[0]) { - case NODE_DIR1_BLOCK: - case NODE_DIR2_BLOCK: - err = truncate_dnode(&dn); - break; - - case NODE_IND1_BLOCK: - case NODE_IND2_BLOCK: - err = truncate_nodes(&dn, nofs, offset[1], 2); - break; - - case NODE_DIND_BLOCK: - err = truncate_nodes(&dn, nofs, offset[1], 3); - cont = 0; - break; - - default: - BUG(); - } - if (err < 0 && err != -ENOENT) - goto fail; - if (offset[1] == 0 && - rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { - lock_page(page); - if (page->mapping != node_mapping) { - f2fs_put_page(page, 1); - goto restart; - } - wait_on_page_writeback(page); - rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; - set_page_dirty(page); - unlock_page(page); - } - offset[1] = 0; - offset[0]++; - nofs += err; - } -fail: - f2fs_put_page(page, 0); - trace_f2fs_truncate_inode_blocks_exit(inode, err); - return err > 0 ? 0 : err; -} - -int truncate_xattr_node(struct inode *inode, struct page *page) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - nid_t nid = F2FS_I(inode)->i_xattr_nid; - struct dnode_of_data dn; - struct page *npage; - - if (!nid) - return 0; - - npage = get_node_page(sbi, nid); - if (IS_ERR(npage)) - return PTR_ERR(npage); - - F2FS_I(inode)->i_xattr_nid = 0; - - /* need to do checkpoint during fsync */ - F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); - - set_new_dnode(&dn, inode, page, npage, nid); - - if (page) - dn.inode_page_locked = 1; - truncate_node(&dn); - return 0; -} - -/* - * Caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). - */ -int remove_inode_page(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *page; - nid_t ino = inode->i_ino; - struct dnode_of_data dn; - int err; - - page = get_node_page(sbi, ino); - if (IS_ERR(page)) - return PTR_ERR(page); - - err = truncate_xattr_node(inode, page); - if (err) { - f2fs_put_page(page, 1); - return err; - } - - /* 0 is possible, after f2fs_new_inode() is failed */ - if (inode->i_blocks != 0 && inode->i_blocks != 1) { - f2fs_msg(sbi->sb, KERN_ERR, "inode %u still has %llu blocks", - ino, inode->i_blocks); - f2fs_handle_error(sbi); - } - set_new_dnode(&dn, inode, page, page, ino); - truncate_node(&dn); - return 0; -} - -struct page *new_inode_page(struct inode *inode, const struct qstr *name) -{ - struct dnode_of_data dn; - - /* allocate inode page for new inode */ - set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - - /* caller should f2fs_put_page(page, 1); */ - return new_node_page(&dn, 0, NULL); -} - -struct page *new_node_page(struct dnode_of_data *dn, - unsigned int ofs, struct page *ipage) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct address_space *mapping = sbi->node_inode->i_mapping; - struct node_info old_ni, new_ni; - struct page *page; - int err; - - if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) - return ERR_PTR(-EPERM); - - page = grab_cache_page(mapping, dn->nid); - if (!page) - return ERR_PTR(-ENOMEM); - - if (!inc_valid_node_count(sbi, dn->inode, 1)) { - err = -ENOSPC; - goto fail; - } - - get_node_info(sbi, dn->nid, &old_ni); - - /* Reinitialize old_ni with new node page */ - BUG_ON(old_ni.blk_addr != NULL_ADDR); - new_ni = old_ni; - new_ni.ino = dn->inode->i_ino; - set_node_addr(sbi, &new_ni, NEW_ADDR); - - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(dn->inode, page); - SetPageUptodate(page); - set_page_dirty(page); - - if (ofs == XATTR_NODE_OFFSET) - F2FS_I(dn->inode)->i_xattr_nid = dn->nid; - - dn->node_page = page; - if (ipage) - update_inode(dn->inode, ipage); - else - sync_inode_page(dn); - if (ofs == 0) - inc_valid_inode_count(sbi); - - return page; - -fail: - clear_node_page_dirty(page); - f2fs_put_page(page, 1); - return ERR_PTR(err); -} - -/* - * Caller should do after getting the following values. - * 0: f2fs_put_page(page, 0) - * LOCKED_PAGE: f2fs_put_page(page, 1) - * error: nothing - */ -static int read_node_page(struct page *page, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - struct node_info ni; - - get_node_info(sbi, page->index, &ni); - - if (ni.blk_addr == NULL_ADDR) { - f2fs_put_page(page, 1); - return -ENOENT; - } - - if (PageUptodate(page)) - return LOCKED_PAGE; - - return f2fs_readpage(sbi, page, ni.blk_addr, type); -} - -/* - * Readahead a node page - */ -void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) -{ - struct address_space *mapping = sbi->node_inode->i_mapping; - struct page *apage; - int err; - - apage = find_get_page(mapping, nid); - if (apage && PageUptodate(apage)) { - f2fs_put_page(apage, 0); - return; - } - f2fs_put_page(apage, 0); - - apage = grab_cache_page(mapping, nid); - if (!apage) - return; - - err = read_node_page(apage, READA); - if (err == 0) - f2fs_put_page(apage, 0); - else if (err == LOCKED_PAGE) - f2fs_put_page(apage, 1); -} - -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) -{ - struct address_space *mapping = sbi->node_inode->i_mapping; - struct page *page; - int err; -repeat: - page = grab_cache_page(mapping, nid); - if (!page) - return ERR_PTR(-ENOMEM); - - err = read_node_page(page, READ_SYNC); - if (err < 0) - return ERR_PTR(err); - else if (err == LOCKED_PAGE) - goto got_it; - - lock_page(page); - if (!PageUptodate(page)) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } -got_it: - if (nid != nid_of_node(page)) { - f2fs_msg(sbi->sb, KERN_ERR, "page node id does not match " - "request: %lu", nid); - f2fs_handle_error(sbi); - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - mark_page_accessed(page); - return page; -} - -/* - * Return a locked page for the desired node page. - * And, readahead MAX_RA_NODE number of node pages. - */ -struct page *get_node_page_ra(struct page *parent, int start) -{ - struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); - struct address_space *mapping = sbi->node_inode->i_mapping; - struct blk_plug plug; - struct page *page; - int err, i, end; - nid_t nid; - - /* First, try getting the desired direct node. */ - nid = get_nid(parent, start, false); - if (!nid) - return ERR_PTR(-ENOENT); -repeat: - page = grab_cache_page(mapping, nid); - if (!page) - return ERR_PTR(-ENOMEM); - - err = read_node_page(page, READ_SYNC); - if (err < 0) - return ERR_PTR(err); - else if (err == LOCKED_PAGE) - goto page_hit; - - blk_start_plug(&plug); - - /* Then, try readahead for siblings of the desired node */ - end = start + MAX_RA_NODE; - end = min(end, NIDS_PER_BLOCK); - for (i = start + 1; i < end; i++) { - nid = get_nid(parent, i, false); - if (!nid) - continue; - ra_node_page(sbi, nid); - } - - blk_finish_plug(&plug); - - lock_page(page); - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } -page_hit: - if (!PageUptodate(page)) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - mark_page_accessed(page); - return page; -} - -void sync_inode_page(struct dnode_of_data *dn) -{ - if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { - update_inode(dn->inode, dn->node_page); - } else if (dn->inode_page) { - if (!dn->inode_page_locked) - lock_page(dn->inode_page); - update_inode(dn->inode, dn->inode_page); - if (!dn->inode_page_locked) - unlock_page(dn->inode_page); - } else { - update_inode_page(dn->inode); - } -} - -int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, - struct writeback_control *wbc) -{ - struct address_space *mapping = sbi->node_inode->i_mapping; - pgoff_t index, end; - struct pagevec pvec; - int step = ino ? 2 : 0; - int nwritten = 0, wrote = 0; - - pagevec_init(&pvec, 0); - -next_step: - index = 0; - end = LONG_MAX; - - while (index <= end) { - int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) - break; - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * flushing sequence with step: - * 0. indirect nodes - * 1. dentry dnodes - * 2. file dnodes - */ - if (step == 0 && IS_DNODE(page)) - continue; - if (step == 1 && (!IS_DNODE(page) || - is_cold_node(page))) - continue; - if (step == 2 && (!IS_DNODE(page) || - !is_cold_node(page))) - continue; - - /* - * If an fsync mode, - * we should not skip writing node pages. - */ - if (ino && ino_of_node(page) == ino) - lock_page(page); - else if (!trylock_page(page)) - continue; - - if (unlikely(page->mapping != mapping)) { -continue_unlock: - unlock_page(page); - continue; - } - if (ino && ino_of_node(page) != ino) - goto continue_unlock; - - if (!PageDirty(page)) { - /* someone wrote it for us */ - goto continue_unlock; - } - - if (!clear_page_dirty_for_io(page)) - goto continue_unlock; - - /* called by fsync() */ - if (ino && IS_DNODE(page)) { - int mark = !is_checkpointed_node(sbi, ino); - set_fsync_mark(page, 1); - if (IS_INODE(page)) - set_dentry_mark(page, mark); - nwritten++; - } else { - set_fsync_mark(page, 0); - set_dentry_mark(page, 0); - } - mapping->a_ops->writepage(page, wbc); - wrote++; - - if (--wbc->nr_to_write == 0) - break; - } - pagevec_release(&pvec); - cond_resched(); - - if (wbc->nr_to_write == 0) { - step = 2; - break; - } - } - - if (step < 2) { - step++; - goto next_step; - } - - if (wrote) - f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); - - return nwritten; -} - -static int f2fs_write_node_page(struct page *page, - struct writeback_control *wbc) -{ - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - nid_t nid; - block_t new_addr; - struct node_info ni; - - if (sbi->por_doing) - goto redirty_out; - - wait_on_page_writeback(page); - - /* get old block addr of this node page */ - nid = nid_of_node(page); - BUG_ON(page->index != nid); - - get_node_info(sbi, nid, &ni); - - /* This page is already truncated */ - if (ni.blk_addr == NULL_ADDR) { - dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); - return 0; - } - - if (wbc->for_reclaim) - goto redirty_out; - - mutex_lock(&sbi->node_write); - set_page_writeback(page); - write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); - set_node_addr(sbi, &ni, new_addr); - dec_page_count(sbi, F2FS_DIRTY_NODES); - mutex_unlock(&sbi->node_write); - unlock_page(page); - return 0; - -redirty_out: - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; -} - -/* - * It is very important to gather dirty pages and write at once, so that we can - * submit a big bio without interfering other data writes. - * Be default, 512 pages (2MB), a segment size, is quite reasonable. - */ -#define COLLECT_DIRTY_NODES 512 -static int f2fs_write_node_pages(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - long nr_to_write = wbc->nr_to_write; - - /* First check balancing cached NAT entries */ - if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { - f2fs_sync_fs(sbi->sb, true); - return 0; - } - - /* collect a number of dirty node pages and write together */ - if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) - return 0; - - /* if mounting is failed, skip writing node pages */ - wbc->nr_to_write = max_hw_blocks(sbi); - sync_node_pages(sbi, 0, wbc); - wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); - return 0; -} - -static int f2fs_set_node_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - - SetPageUptodate(page); - if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); - inc_page_count(sbi, F2FS_DIRTY_NODES); - SetPagePrivate(page); - return 1; - } - return 0; -} - -static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) -{ - struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - if (PageDirty(page)) - dec_page_count(sbi, F2FS_DIRTY_NODES); - ClearPagePrivate(page); -} - -static int f2fs_release_node_page(struct page *page, gfp_t wait) -{ - ClearPagePrivate(page); - return 1; -} - -/* - * Structure of the f2fs node operations - */ -const struct address_space_operations f2fs_node_aops = { - .writepage = f2fs_write_node_page, - .writepages = f2fs_write_node_pages, - .set_page_dirty = f2fs_set_node_page_dirty, - .invalidatepage = f2fs_invalidate_node_page, - .releasepage = f2fs_release_node_page, -}; - -static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) -{ - struct list_head *this; - struct free_nid *i; - list_for_each(this, head) { - i = list_entry(this, struct free_nid, list); - if (i->nid == n) - return i; - } - return NULL; -} - -static void __del_from_free_nid_list(struct free_nid *i) -{ - list_del(&i->list); - kmem_cache_free(free_nid_slab, i); -} - -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) -{ - struct free_nid *i; - struct nat_entry *ne; - bool allocated = false; - - if (nm_i->fcnt > 2 * MAX_FREE_NIDS) - return -1; - - /* 0 nid should not be used */ - if (nid == 0) - return 0; - - if (!build) - goto retry; - - /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne && nat_get_blkaddr(ne) != NULL_ADDR) - allocated = true; - read_unlock(&nm_i->nat_tree_lock); - if (allocated) - return 0; -retry: - i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); - if (!i) { - cond_resched(); - goto retry; - } - i->nid = nid; - i->state = NID_NEW; - - spin_lock(&nm_i->free_nid_list_lock); - if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { - spin_unlock(&nm_i->free_nid_list_lock); - kmem_cache_free(free_nid_slab, i); - return 0; - } - list_add_tail(&i->list, &nm_i->free_nid_list); - nm_i->fcnt++; - spin_unlock(&nm_i->free_nid_list_lock); - return 1; -} - -static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) -{ - struct free_nid *i; - spin_lock(&nm_i->free_nid_list_lock); - i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - if (i && i->state == NID_NEW) { - __del_from_free_nid_list(i); - nm_i->fcnt--; - } - spin_unlock(&nm_i->free_nid_list_lock); -} - -static void scan_nat_page(struct f2fs_nm_info *nm_i, - struct page *nat_page, nid_t start_nid) -{ - struct f2fs_nat_block *nat_blk = page_address(nat_page); - block_t blk_addr; - int i; - - i = start_nid % NAT_ENTRY_PER_BLOCK; - - for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { - - if (start_nid >= nm_i->max_nid) - break; - - blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); - BUG_ON(blk_addr == NEW_ADDR); - if (blk_addr == NULL_ADDR) { - if (add_free_nid(nm_i, start_nid, true) < 0) - break; - } - } -} - -static void build_free_nids(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; - int i = 0; - nid_t nid = nm_i->next_scan_nid; - - /* Enough entries */ - if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) - return; - - /* readahead nat pages to be scanned */ - ra_nat_pages(sbi, nid); - - while (1) { - struct page *page = get_current_nat_page(sbi, nid); - - scan_nat_page(nm_i, page, nid); - f2fs_put_page(page, 1); - - nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); - if (nid >= nm_i->max_nid) - nid = 0; - - if (i++ == FREE_NID_PAGES) - break; - } - - /* go to the next free nat pages to find free nids abundantly */ - nm_i->next_scan_nid = nid; - - /* find free nids from current sum_pages */ - mutex_lock(&curseg->curseg_mutex); - for (i = 0; i < nats_in_cursum(sum); i++) { - block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); - nid = le32_to_cpu(nid_in_journal(sum, i)); - if (addr == NULL_ADDR) - add_free_nid(nm_i, nid, true); - else - remove_free_nid(nm_i, nid); - } - mutex_unlock(&curseg->curseg_mutex); -} - -/* - * If this function returns success, caller can obtain a new nid - * from second parameter of this function. - * The returned nid could be used ino as well as nid when inode is created. - */ -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i = NULL; - struct list_head *this; -retry: - if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) - return false; - - spin_lock(&nm_i->free_nid_list_lock); - - /* We should not use stale free nids created by build_free_nids */ - if (nm_i->fcnt && !sbi->on_build_free_nids) { - BUG_ON(list_empty(&nm_i->free_nid_list)); - list_for_each(this, &nm_i->free_nid_list) { - i = list_entry(this, struct free_nid, list); - if (i->state == NID_NEW) - break; - } - - BUG_ON(i->state != NID_NEW); - *nid = i->nid; - i->state = NID_ALLOC; - nm_i->fcnt--; - spin_unlock(&nm_i->free_nid_list_lock); - return true; - } - spin_unlock(&nm_i->free_nid_list_lock); - - /* Let's scan nat pages and its caches to get free nids */ - mutex_lock(&nm_i->build_lock); - sbi->on_build_free_nids = 1; - build_free_nids(sbi); - sbi->on_build_free_nids = 0; - mutex_unlock(&nm_i->build_lock); - goto retry; -} - -/* - * alloc_nid() should be called prior to this function. - */ -void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i; - - spin_lock(&nm_i->free_nid_list_lock); - i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - BUG_ON(!i || i->state != NID_ALLOC); - __del_from_free_nid_list(i); - spin_unlock(&nm_i->free_nid_list_lock); -} - -/* - * alloc_nid() should be called prior to this function. - */ -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i; - - if (!nid) - return; - - spin_lock(&nm_i->free_nid_list_lock); - i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - BUG_ON(!i || i->state != NID_ALLOC); - if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { - __del_from_free_nid_list(i); - } else { - i->state = NID_NEW; - nm_i->fcnt++; - } - spin_unlock(&nm_i->free_nid_list_lock); -} - -void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_summary *sum, struct node_info *ni, - block_t new_blkaddr) -{ - rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); - set_node_addr(sbi, ni, new_blkaddr); - clear_node_page_dirty(page); -} - -int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) -{ - struct address_space *mapping = sbi->node_inode->i_mapping; - struct f2fs_node *src, *dst; - nid_t ino = ino_of_node(page); - struct node_info old_ni, new_ni; - struct page *ipage; - int err; - - ipage = grab_cache_page(mapping, ino); - if (!ipage) - return -ENOMEM; - - /* Should not use this inode from free nid list */ - remove_free_nid(NM_I(sbi), ino); - - get_node_info(sbi, ino, &old_ni); - SetPageUptodate(ipage); - fill_node_footer(ipage, ino, ino, 0, true); - - src = F2FS_NODE(page); - dst = F2FS_NODE(ipage); - - memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); - dst->i.i_size = 0; - dst->i.i_blocks = cpu_to_le64(1); - dst->i.i_links = cpu_to_le32(1); - dst->i.i_xattr_nid = 0; - - new_ni = old_ni; - new_ni.ino = ino; - - err = set_node_addr(sbi, &new_ni, NEW_ADDR); - if (!err) - if (!inc_valid_node_count(sbi, NULL, 1)) - err = -ENOSPC; - if (!err) - inc_valid_inode_count(sbi); - f2fs_put_page(ipage, 1); - return err; -} - -int restore_node_summary(struct f2fs_sb_info *sbi, - unsigned int segno, struct f2fs_summary_block *sum) -{ - struct f2fs_node *rn; - struct f2fs_summary *sum_entry; - struct page *page; - block_t addr; - int i, last_offset; - - /* alloc temporal page for read node */ - page = alloc_page(GFP_NOFS | __GFP_ZERO); - if (!page) - return -ENOMEM; - lock_page(page); - - /* scan the node segment */ - last_offset = sbi->blocks_per_seg; - addr = START_BLOCK(sbi, segno); - sum_entry = &sum->entries[0]; - - for (i = 0; i < last_offset; i++, sum_entry++) { - /* - * In order to read next node page, - * we must clear PageUptodate flag. - */ - ClearPageUptodate(page); - - if (f2fs_readpage(sbi, page, addr, READ_SYNC)) - goto out; - - lock_page(page); - rn = F2FS_NODE(page); - sum_entry->nid = rn->footer.nid; - sum_entry->version = 0; - sum_entry->ofs_in_node = 0; - addr++; - } - unlock_page(page); -out: - __free_pages(page, 0); - return 0; -} - -static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; - int i; - - mutex_lock(&curseg->curseg_mutex); - - if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { - mutex_unlock(&curseg->curseg_mutex); - return false; - } - - for (i = 0; i < nats_in_cursum(sum); i++) { - struct nat_entry *ne; - struct f2fs_nat_entry raw_ne; - nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); - - raw_ne = nat_in_journal(sum, i); -retry: - write_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne) { - __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); - continue; - } - ne = grab_nat_entry(nm_i, nid); - if (!ne) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } - nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); - nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); - nat_set_version(ne, raw_ne.version); - __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); - } - update_nats_in_cursum(sum, -i); - mutex_unlock(&curseg->curseg_mutex); - return true; -} - -/* - * This function is called during the checkpointing process. - */ -void flush_nat_entries(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; - struct list_head *cur, *n; - struct page *page = NULL; - struct f2fs_nat_block *nat_blk = NULL; - nid_t start_nid = 0, end_nid = 0; - bool flushed; - - flushed = flush_nats_in_journal(sbi); - - if (!flushed) - mutex_lock(&curseg->curseg_mutex); - - /* 1) flush dirty nat caches */ - list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { - struct nat_entry *ne; - nid_t nid; - struct f2fs_nat_entry raw_ne; - int offset = -1; - block_t new_blkaddr; - - ne = list_entry(cur, struct nat_entry, list); - nid = nat_get_nid(ne); - - if (nat_get_blkaddr(ne) == NEW_ADDR) - continue; - if (flushed) - goto to_nat_page; - - /* if there is room for nat enries in curseg->sumpage */ - offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); - if (offset >= 0) { - raw_ne = nat_in_journal(sum, offset); - goto flush_now; - } -to_nat_page: - if (!page || (start_nid > nid || nid > end_nid)) { - if (page) { - f2fs_put_page(page, 1); - page = NULL; - } - start_nid = START_NID(nid); - end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; - - /* - * get nat block with dirty flag, increased reference - * count, mapped and lock - */ - page = get_next_nat_page(sbi, start_nid); - nat_blk = page_address(page); - } - - BUG_ON(!nat_blk); - raw_ne = nat_blk->entries[nid - start_nid]; -flush_now: - new_blkaddr = nat_get_blkaddr(ne); - - raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); - raw_ne.block_addr = cpu_to_le32(new_blkaddr); - raw_ne.version = nat_get_version(ne); - - if (offset < 0) { - nat_blk->entries[nid - start_nid] = raw_ne; - } else { - nat_in_journal(sum, offset) = raw_ne; - nid_in_journal(sum, offset) = cpu_to_le32(nid); - } - - if (nat_get_blkaddr(ne) == NULL_ADDR && - add_free_nid(NM_I(sbi), nid, false) <= 0) { - write_lock(&nm_i->nat_tree_lock); - __del_from_nat_cache(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); - } else { - write_lock(&nm_i->nat_tree_lock); - __clear_nat_cache_dirty(nm_i, ne); - ne->checkpointed = true; - write_unlock(&nm_i->nat_tree_lock); - } - } - if (!flushed) - mutex_unlock(&curseg->curseg_mutex); - f2fs_put_page(page, 1); - - /* 2) shrink nat caches if necessary */ - try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); -} - -static int init_node_manager(struct f2fs_sb_info *sbi) -{ - struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); - struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned char *version_bitmap; - unsigned int nat_segs, nat_blocks; - - nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); - - /* segment_count_nat includes pair segment so divide to 2. */ - nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; - nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; - nm_i->fcnt = 0; - nm_i->nat_cnt = 0; - - INIT_LIST_HEAD(&nm_i->free_nid_list); - INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); - INIT_LIST_HEAD(&nm_i->nat_entries); - INIT_LIST_HEAD(&nm_i->dirty_nat_entries); - - mutex_init(&nm_i->build_lock); - spin_lock_init(&nm_i->free_nid_list_lock); - rwlock_init(&nm_i->nat_tree_lock); - - nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); - nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); - version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); - if (!version_bitmap) - return -EFAULT; - - nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, - GFP_KERNEL); - if (!nm_i->nat_bitmap) - return -ENOMEM; - return 0; -} - -int build_node_manager(struct f2fs_sb_info *sbi) -{ - int err; - - sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); - if (!sbi->nm_info) - return -ENOMEM; - - err = init_node_manager(sbi); - if (err) - return err; - - build_free_nids(sbi); - return 0; -} - -void destroy_node_manager(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i, *next_i; - struct nat_entry *natvec[NATVEC_SIZE]; - nid_t nid = 0; - unsigned int found; - - if (!nm_i) - return; - - /* destroy free nid list */ - spin_lock(&nm_i->free_nid_list_lock); - list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { - BUG_ON(i->state == NID_ALLOC); - __del_from_free_nid_list(i); - nm_i->fcnt--; - } - BUG_ON(nm_i->fcnt); - spin_unlock(&nm_i->free_nid_list_lock); - - /* destroy nat cache */ - write_lock(&nm_i->nat_tree_lock); - while ((found = __gang_lookup_nat_cache(nm_i, - nid, NATVEC_SIZE, natvec))) { - unsigned idx; - for (idx = 0; idx < found; idx++) { - struct nat_entry *e = natvec[idx]; - nid = nat_get_nid(e) + 1; - __del_from_nat_cache(nm_i, e); - } - } - BUG_ON(nm_i->nat_cnt); - write_unlock(&nm_i->nat_tree_lock); - - kfree(nm_i->nat_bitmap); - sbi->nm_info = NULL; - kfree(nm_i); -} - -int __init create_node_manager_caches(void) -{ - nat_entry_slab = f2fs_kmem_cache_create("nat_entry", - sizeof(struct nat_entry), NULL); - if (!nat_entry_slab) - return -ENOMEM; - - free_nid_slab = f2fs_kmem_cache_create("free_nid", - sizeof(struct free_nid), NULL); - if (!free_nid_slab) { - kmem_cache_destroy(nat_entry_slab); - return -ENOMEM; - } - return 0; -} - -void destroy_node_manager_caches(void) -{ - kmem_cache_destroy(free_nid_slab); - kmem_cache_destroy(nat_entry_slab); -} diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h deleted file mode 100644 index 3496bb3e15d..00000000000 --- a/fs/f2fs/node.h +++ /dev/null @@ -1,345 +0,0 @@ -/* - * fs/f2fs/node.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -/* start node id of a node block dedicated to the given node id */ -#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) - -/* node block offset on the NAT area dedicated to the given start node id */ -#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) - -/* # of pages to perform readahead before building free nids */ -#define FREE_NID_PAGES 4 - -/* maximum # of free node ids to produce during build_free_nids */ -#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) - -/* maximum readahead size for node during getting data blocks */ -#define MAX_RA_NODE 128 - -/* maximum cached nat entries to manage memory footprint */ -#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) - -/* vector size for gang look-up from nat cache that consists of radix tree */ -#define NATVEC_SIZE 64 - -/* return value for read_node_page */ -#define LOCKED_PAGE 1 - -/* - * For node information - */ -struct node_info { - nid_t nid; /* node id */ - nid_t ino; /* inode number of the node's owner */ - block_t blk_addr; /* block address of the node */ - unsigned char version; /* version of the node */ -}; - -struct nat_entry { - struct list_head list; /* for clean or dirty nat list */ - bool checkpointed; /* whether it is checkpointed or not */ - struct node_info ni; /* in-memory node information */ -}; - -#define nat_get_nid(nat) (nat->ni.nid) -#define nat_set_nid(nat, n) (nat->ni.nid = n) -#define nat_get_blkaddr(nat) (nat->ni.blk_addr) -#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) -#define nat_get_ino(nat) (nat->ni.ino) -#define nat_set_ino(nat, i) (nat->ni.ino = i) -#define nat_get_version(nat) (nat->ni.version) -#define nat_set_version(nat, v) (nat->ni.version = v) - -#define __set_nat_cache_dirty(nm_i, ne) \ - list_move_tail(&ne->list, &nm_i->dirty_nat_entries); -#define __clear_nat_cache_dirty(nm_i, ne) \ - list_move_tail(&ne->list, &nm_i->nat_entries); -#define inc_node_version(version) (++version) - -static inline void node_info_from_raw_nat(struct node_info *ni, - struct f2fs_nat_entry *raw_ne) -{ - ni->ino = le32_to_cpu(raw_ne->ino); - ni->blk_addr = le32_to_cpu(raw_ne->block_addr); - ni->version = raw_ne->version; -} - -/* - * For free nid mangement - */ -enum nid_state { - NID_NEW, /* newly added to free nid list */ - NID_ALLOC /* it is allocated */ -}; - -struct free_nid { - struct list_head list; /* for free node id list */ - nid_t nid; /* node id */ - int state; /* in use or not: NID_NEW or NID_ALLOC */ -}; - -static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *fnid; - - if (nm_i->fcnt <= 0) - return -1; - spin_lock(&nm_i->free_nid_list_lock); - fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); - *nid = fnid->nid; - spin_unlock(&nm_i->free_nid_list_lock); - return 0; -} - -/* - * inline functions - */ -static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); -} - -static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - pgoff_t block_off; - pgoff_t block_addr; - int seg_off; - - block_off = NAT_BLOCK_OFFSET(start); - seg_off = block_off >> sbi->log_blocks_per_seg; - - block_addr = (pgoff_t)(nm_i->nat_blkaddr + - (seg_off << sbi->log_blocks_per_seg << 1) + - (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); - - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - block_addr += sbi->blocks_per_seg; - - return block_addr; -} - -static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, - pgoff_t block_addr) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - - block_addr -= nm_i->nat_blkaddr; - if ((block_addr >> sbi->log_blocks_per_seg) % 2) - block_addr -= sbi->blocks_per_seg; - else - block_addr += sbi->blocks_per_seg; - - return block_addr + nm_i->nat_blkaddr; -} - -static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) -{ - unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); - - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - f2fs_clear_bit(block_off, nm_i->nat_bitmap); - else - f2fs_set_bit(block_off, nm_i->nat_bitmap); -} - -static inline void fill_node_footer(struct page *page, nid_t nid, - nid_t ino, unsigned int ofs, bool reset) -{ - struct f2fs_node *rn = F2FS_NODE(page); - if (reset) - memset(rn, 0, sizeof(*rn)); - rn->footer.nid = cpu_to_le32(nid); - rn->footer.ino = cpu_to_le32(ino); - rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); -} - -static inline void copy_node_footer(struct page *dst, struct page *src) -{ - struct f2fs_node *src_rn = F2FS_NODE(src); - struct f2fs_node *dst_rn = F2FS_NODE(dst); - memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); -} - -static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) -{ - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - struct f2fs_node *rn = F2FS_NODE(page); - - rn->footer.cp_ver = ckpt->checkpoint_ver; - rn->footer.next_blkaddr = cpu_to_le32(blkaddr); -} - -static inline nid_t ino_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - return le32_to_cpu(rn->footer.ino); -} - -static inline nid_t nid_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - return le32_to_cpu(rn->footer.nid); -} - -static inline unsigned int ofs_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - unsigned flag = le32_to_cpu(rn->footer.flag); - return flag >> OFFSET_BIT_SHIFT; -} - -static inline unsigned long long cpver_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - return le64_to_cpu(rn->footer.cp_ver); -} - -static inline block_t next_blkaddr_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - return le32_to_cpu(rn->footer.next_blkaddr); -} - -/* - * f2fs assigns the following node offsets described as (num). - * N = NIDS_PER_BLOCK - * - * Inode block (0) - * |- direct node (1) - * |- direct node (2) - * |- indirect node (3) - * | `- direct node (4 => 4 + N - 1) - * |- indirect node (4 + N) - * | `- direct node (5 + N => 5 + 2N - 1) - * `- double indirect node (5 + 2N) - * `- indirect node (6 + 2N) - * `- direct node (x(N + 1)) - */ -static inline bool IS_DNODE(struct page *node_page) -{ - unsigned int ofs = ofs_of_node(node_page); - - if (ofs == XATTR_NODE_OFFSET) - return false; - - if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || - ofs == 5 + 2 * NIDS_PER_BLOCK) - return false; - if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { - ofs -= 6 + 2 * NIDS_PER_BLOCK; - if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) - return false; - } - return true; -} - -static inline void set_nid(struct page *p, int off, nid_t nid, bool i) -{ - struct f2fs_node *rn = F2FS_NODE(p); - - wait_on_page_writeback(p); - - if (i) - rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); - else - rn->in.nid[off] = cpu_to_le32(nid); - set_page_dirty(p); -} - -static inline nid_t get_nid(struct page *p, int off, bool i) -{ - struct f2fs_node *rn = F2FS_NODE(p); - - if (i) - return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); - return le32_to_cpu(rn->in.nid[off]); -} - -/* - * Coldness identification: - * - Mark cold files in f2fs_inode_info - * - Mark cold node blocks in their node footer - * - Mark cold data pages in page cache - */ -static inline int is_file(struct inode *inode, int type) -{ - return F2FS_I(inode)->i_advise & type; -} - -static inline void set_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise |= type; -} - -static inline void clear_file(struct inode *inode, int type) -{ - F2FS_I(inode)->i_advise &= ~type; -} - -#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) -#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) -#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) -#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) -#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) -#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) - -static inline int is_cold_data(struct page *page) -{ - return PageChecked(page); -} - -static inline void set_cold_data(struct page *page) -{ - SetPageChecked(page); -} - -static inline void clear_cold_data(struct page *page) -{ - ClearPageChecked(page); -} - -static inline int is_node(struct page *page, int type) -{ - struct f2fs_node *rn = F2FS_NODE(page); - return le32_to_cpu(rn->footer.flag) & (1 << type); -} - -#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) -#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) -#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) - -static inline void set_cold_node(struct inode *inode, struct page *page) -{ - struct f2fs_node *rn = F2FS_NODE(page); - unsigned int flag = le32_to_cpu(rn->footer.flag); - - if (S_ISDIR(inode->i_mode)) - flag &= ~(0x1 << COLD_BIT_SHIFT); - else - flag |= (0x1 << COLD_BIT_SHIFT); - rn->footer.flag = cpu_to_le32(flag); -} - -static inline void set_mark(struct page *page, int mark, int type) -{ - struct f2fs_node *rn = F2FS_NODE(page); - unsigned int flag = le32_to_cpu(rn->footer.flag); - if (mark) - flag |= (0x1 << type); - else - flag &= ~(0x1 << type); - rn->footer.flag = cpu_to_le32(flag); -} -#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) -#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c deleted file mode 100644 index bee00347ee2..00000000000 --- a/fs/f2fs/recovery.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * fs/f2fs/recovery.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include "f2fs.h" -#include "node.h" -#include "segment.h" - -static struct kmem_cache *fsync_entry_slab; - -bool space_for_roll_forward(struct f2fs_sb_info *sbi) -{ - if (sbi->last_valid_block_count + sbi->alloc_valid_block_count - > sbi->user_block_count) - return false; - return true; -} - -static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, - nid_t ino) -{ - struct list_head *this; - struct fsync_inode_entry *entry; - - list_for_each(this, head) { - entry = list_entry(this, struct fsync_inode_entry, list); - if (entry->inode->i_ino == ino) - return entry; - } - return NULL; -} - -static int recover_dentry(struct page *ipage, struct inode *inode) -{ - struct f2fs_node *raw_node = F2FS_NODE(ipage); - struct f2fs_inode *raw_inode = &(raw_node->i); - nid_t pino = le32_to_cpu(raw_inode->i_pino); - struct f2fs_dir_entry *de; - struct qstr name; - struct page *page; - struct inode *dir, *einode; - int err = 0; - - dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); - if (!dir) { - dir = f2fs_iget(inode->i_sb, pino); - if (IS_ERR(dir)) { - f2fs_msg(inode->i_sb, KERN_INFO, - "%s: f2fs_iget failed: %ld", - __func__, PTR_ERR(dir)); - err = PTR_ERR(dir); - goto out; - } - set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); - add_dirty_dir_inode(dir); - } - - name.len = le32_to_cpu(raw_inode->i_namelen); - name.name = raw_inode->i_name; -retry: - de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) - goto out_unmap_put; - if (de) { - einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); - if (IS_ERR(einode)) { - WARN_ON(1); - if (PTR_ERR(einode) == -ENOENT) - err = -EEXIST; - goto out_unmap_put; - } - err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); - if (err) { - iput(einode); - goto out_unmap_put; - } - f2fs_delete_entry(de, page, einode); - iput(einode); - goto retry; - } - err = __f2fs_add_link(dir, &name, inode); - goto out; - -out_unmap_put: - kunmap(page); - f2fs_put_page(page, 0); -out: - f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode and its dentry: " - "ino = %x, name = %s, dir = %lx, err = %d", - ino_of_node(ipage), raw_inode->i_name, - IS_ERR(dir) ? 0 : dir->i_ino, err); - return err; -} - -static int recover_inode(struct inode *inode, struct page *node_page) -{ - struct f2fs_node *raw_node = F2FS_NODE(node_page); - struct f2fs_inode *raw_inode = &(raw_node->i); - - if (!IS_INODE(node_page)) - return 0; - - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - i_size_write(inode, le64_to_cpu(raw_inode->i_size)); - inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - - if (is_dent_dnode(node_page)) - return recover_dentry(node_page, inode); - - f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode: ino = %x, name = %s", - ino_of_node(node_page), raw_inode->i_name); - return 0; -} - -static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) -{ - unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); - struct curseg_info *curseg; - struct page *page; - block_t blkaddr; - int err = 0; - - /* get node pages in the current segment */ - curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); - blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; - - /* read node page */ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) - return -ENOMEM; - lock_page(page); - - while (1) { - struct fsync_inode_entry *entry; - - err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); - if (err) - goto out; - - lock_page(page); - - if (cp_ver != cpver_of_node(page)) - break; - - if (!is_fsync_dnode(page)) - goto next; - - entry = get_fsync_inode(head, ino_of_node(page)); - if (entry) { - if (IS_INODE(page) && is_dent_dnode(page)) - set_inode_flag(F2FS_I(entry->inode), - FI_INC_LINK); - } else { - if (IS_INODE(page) && is_dent_dnode(page)) { - err = recover_inode_page(sbi, page); - if (err) { - f2fs_msg(sbi->sb, KERN_INFO, - "%s: recover_inode_page failed: %d", - __func__, err); - break; - } - } - - /* add this fsync inode to the list */ - entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); - if (!entry) { - err = -ENOMEM; - break; - } - - entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); - if (IS_ERR(entry->inode)) { - err = PTR_ERR(entry->inode); - f2fs_msg(sbi->sb, KERN_INFO, - "%s: f2fs_iget failed: %d", - __func__, err); - kmem_cache_free(fsync_entry_slab, entry); - break; - } - list_add_tail(&entry->list, head); - } - entry->blkaddr = blkaddr; - - err = recover_inode(entry->inode, page); - if (err && err != -ENOENT) { - f2fs_msg(sbi->sb, KERN_INFO, - "%s: recover_inode failed: %d", - __func__, err); - break; - } -next: - /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - } - unlock_page(page); -out: - __free_pages(page, 0); - return err; -} - -static void destroy_fsync_dnodes(struct list_head *head) -{ - struct fsync_inode_entry *entry, *tmp; - - list_for_each_entry_safe(entry, tmp, head, list) { - iput(entry->inode); - list_del(&entry->list); - kmem_cache_free(fsync_entry_slab, entry); - } -} - -static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, - block_t blkaddr, struct dnode_of_data *dn) -{ - struct seg_entry *sentry; - unsigned int segno = GET_SEGNO(sbi, blkaddr); - unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & - (sbi->blocks_per_seg - 1); - struct f2fs_summary sum; - nid_t ino, nid; - void *kaddr; - struct inode *inode; - struct page *node_page; - unsigned int offset; - block_t bidx; - int i; - - sentry = get_seg_entry(sbi, segno); - if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) - return 0; - - /* Get the previous summary */ - for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { - struct curseg_info *curseg = CURSEG_I(sbi, i); - if (curseg->segno == segno) { - sum = curseg->sum_blk->entries[blkoff]; - break; - } - } - if (i > CURSEG_COLD_DATA) { - struct page *sum_page = get_sum_page(sbi, segno); - struct f2fs_summary_block *sum_node; - kaddr = page_address(sum_page); - sum_node = (struct f2fs_summary_block *)kaddr; - sum = sum_node->entries[blkoff]; - f2fs_put_page(sum_page, 1); - } - - /* Use the locked dnode page and inode */ - nid = le32_to_cpu(sum.nid); - if (dn->inode->i_ino == nid) { - struct dnode_of_data tdn = *dn; - tdn.nid = nid; - tdn.node_page = dn->inode_page; - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; - } else if (dn->nid == nid) { - struct dnode_of_data tdn = *dn; - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; - } - - /* Get the node page */ - node_page = get_node_page(sbi, nid); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); - - offset = ofs_of_node(node_page); - ino = ino_of_node(node_page); - f2fs_put_page(node_page, 1); - - /* Skip nodes with circular references */ - if (ino == dn->inode->i_ino) { - f2fs_msg(sbi->sb, KERN_ERR, "%s: node %x has circular inode %x", - __func__, ino, nid); - f2fs_handle_error(sbi); - return -EDEADLK; - } - - /* Deallocate previous index in the node page */ - inode = f2fs_iget(sbi->sb, ino); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - bidx = start_bidx_of_node(offset, F2FS_I(inode)) + - le16_to_cpu(sum.ofs_in_node); - - truncate_hole(inode, bidx, bidx + 1); - iput(inode); - return 0; -} - -static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, block_t blkaddr) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int start, end; - struct dnode_of_data dn; - struct f2fs_summary sum; - struct node_info ni; - int err = 0, recovered = 0; - int ilock; - - start = start_bidx_of_node(ofs_of_node(page), fi); - if (IS_INODE(page)) - end = start + ADDRS_PER_INODE(fi); - else - end = start + ADDRS_PER_BLOCK; - - ilock = mutex_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - - err = get_dnode_of_data(&dn, start, ALLOC_NODE); - if (err) { - mutex_unlock_op(sbi, ilock); - f2fs_msg(sbi->sb, KERN_INFO, - "%s: get_dnode_of_data failed: %d", __func__, err); - return err; - } - - wait_on_page_writeback(dn.node_page); - - get_node_info(sbi, dn.nid, &ni); - BUG_ON(ni.ino != ino_of_node(page)); - BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); - - for (; start < end; start++) { - block_t src, dest; - - src = datablock_addr(dn.node_page, dn.ofs_in_node); - dest = datablock_addr(page, dn.ofs_in_node); - - if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { - if (src == NULL_ADDR) { - int err = reserve_new_block(&dn); - /* We should not get -ENOSPC */ - if (err) - f2fs_msg(sbi->sb, KERN_INFO, - "%s: reserve_new_block failed: %d", - __func__, err); - BUG_ON(err); - } - - /* Check the previous node page having this index */ - err = check_index_in_prev_nodes(sbi, dest, &dn); - if (err) - goto err; - - set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); - - /* write dummy data page */ - recover_data_page(sbi, NULL, &sum, src, dest); - update_extent_cache(dest, &dn); - recovered++; - } - dn.ofs_in_node++; - } - - /* write node page in place */ - set_summary(&sum, dn.nid, 0, 0); - if (IS_INODE(dn.node_page)) - sync_inode_page(&dn); - - copy_node_footer(dn.node_page, page); - fill_node_footer(dn.node_page, dn.nid, ni.ino, - ofs_of_node(page), false); - set_page_dirty(dn.node_page); - - recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); -err: - f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); - - f2fs_msg(sbi->sb, KERN_DEBUG, "recover_data: ino = %lx, " - "recovered_data = %d blocks, err = %d", - inode->i_ino, recovered, err); - return err; -} - -static int recover_data(struct f2fs_sb_info *sbi, - struct list_head *head, int type) -{ - unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); - struct curseg_info *curseg; - struct page *page; - int err = 0; - block_t blkaddr; - - /* get node pages in the current segment */ - curseg = CURSEG_I(sbi, type); - blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - - /* read node page */ - page = alloc_page(GFP_NOFS | __GFP_ZERO); - if (!page) - return -ENOMEM; - - lock_page(page); - - while (1) { - struct fsync_inode_entry *entry; - - err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); - if (err) { - f2fs_msg(sbi->sb, KERN_INFO, - "%s: f2fs_readpage failed: %d", - __func__, err); - goto out; - } - - lock_page(page); - - if (cp_ver != cpver_of_node(page)) - break; - - entry = get_fsync_inode(head, ino_of_node(page)); - if (!entry) - goto next; - - err = do_recover_data(sbi, entry->inode, page, blkaddr); - if (err) { - f2fs_msg(sbi->sb, KERN_INFO, - "%s: do_recover_data failed: %d", - __func__, err); - break; - } - - if (entry->blkaddr == blkaddr) { - iput(entry->inode); - list_del(&entry->list); - kmem_cache_free(fsync_entry_slab, entry); - } -next: - /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - } - unlock_page(page); -out: - __free_pages(page, 0); - - if (!err) - allocate_new_segments(sbi); - return err; -} - -int recover_fsync_data(struct f2fs_sb_info *sbi) -{ - struct list_head inode_list; - int err; - - fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", - sizeof(struct fsync_inode_entry), NULL); - if (unlikely(!fsync_entry_slab)) - return -ENOMEM; - - INIT_LIST_HEAD(&inode_list); - - /* step #1: find fsynced inode numbers */ - sbi->por_doing = 1; - err = find_fsync_dnodes(sbi, &inode_list); - if (err) { - f2fs_msg(sbi->sb, KERN_INFO, - "%s: find_fsync_dnodes failed: %d", __func__, err); - goto out; - } - - if (list_empty(&inode_list)) - goto out; - - /* step #2: recover data */ - err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - if (!list_empty(&inode_list)) { - f2fs_handle_error(sbi); - err = -EIO; - } -out: - destroy_fsync_dnodes(&inode_list); - kmem_cache_destroy(fsync_entry_slab); - sbi->por_doing = 0; - if (!err) { - f2fs_msg(sbi->sb, KERN_INFO, "recovery complete"); - write_checkpoint(sbi, false); - } else - f2fs_msg(sbi->sb, KERN_ERR, "recovery did not fully complete"); - - return err; -} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c deleted file mode 100644 index cb8e70e88ac..00000000000 --- a/fs/f2fs/segment.c +++ /dev/null @@ -1,1787 +0,0 @@ -/* - * fs/f2fs/segment.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "segment.h" -#include "node.h" -#include - -/* - * This function balances dirty node and dentry pages. - * In addition, it controls garbage collection. - */ -void f2fs_balance_fs(struct f2fs_sb_info *sbi) -{ - /* - * We should do GC or end up with checkpoint, if there are so many dirty - * dir/node pages without enough free segments. - */ - if (has_not_enough_free_secs(sbi, 0)) { - mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi); - } -} - -static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, - enum dirty_type dirty_type) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - - /* need not be added */ - if (IS_CURSEG(sbi, segno)) - return; - - if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) - dirty_i->nr_dirty[dirty_type]++; - - if (dirty_type == DIRTY) { - struct seg_entry *sentry = get_seg_entry(sbi, segno); - enum dirty_type t = DIRTY_HOT_DATA; - - dirty_type = sentry->type; - - if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) - dirty_i->nr_dirty[dirty_type]++; - - /* Only one bitmap should be set */ - for (; t <= DIRTY_COLD_NODE; t++) { - if (t == dirty_type) - continue; - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]--; - } - } -} - -static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, - enum dirty_type dirty_type) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) - dirty_i->nr_dirty[dirty_type]--; - - if (dirty_type == DIRTY) { - enum dirty_type t = DIRTY_HOT_DATA; - - /* clear all the bitmaps */ - for (; t <= DIRTY_COLD_NODE; t++) - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]--; - - if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) - clear_bit(GET_SECNO(sbi, segno), - dirty_i->victim_secmap); - } -} - -/* - * Should not occur error such as -ENOMEM. - * Adding dirty entry into seglist is not critical operation. - * If a given segment is one of current working segments, it won't be added. - */ -static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned short valid_blocks; - - if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) - return; - - mutex_lock(&dirty_i->seglist_lock); - - valid_blocks = get_valid_blocks(sbi, segno, 0); - - if (valid_blocks == 0) { - __locate_dirty_segment(sbi, segno, PRE); - __remove_dirty_segment(sbi, segno, DIRTY); - } else if (valid_blocks < sbi->blocks_per_seg) { - __locate_dirty_segment(sbi, segno, DIRTY); - } else { - /* Recovery routine with SSR needs this */ - __remove_dirty_segment(sbi, segno, DIRTY); - } - - mutex_unlock(&dirty_i->seglist_lock); -} - -/* - * Should call clear_prefree_segments after checkpoint is done. - */ -static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno = -1; - unsigned int total_segs = TOTAL_SEGS(sbi); - - mutex_lock(&dirty_i->seglist_lock); - while (1) { - segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - segno + 1); - if (segno >= total_segs) - break; - __set_test_and_free(sbi, segno); - } - mutex_unlock(&dirty_i->seglist_lock); -} - -void clear_prefree_segments(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno = -1; - unsigned int total_segs = TOTAL_SEGS(sbi); - - mutex_lock(&dirty_i->seglist_lock); - while (1) { - segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - segno + 1); - if (segno >= total_segs) - break; - - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) - dirty_i->nr_dirty[PRE]--; - - /* Let's use trim */ - if (test_opt(sbi, DISCARD)) - blkdev_issue_discard(sbi->sb->s_bdev, - START_BLOCK(sbi, segno) << - sbi->log_sectors_per_block, - 1 << (sbi->log_sectors_per_block + - sbi->log_blocks_per_seg), - GFP_NOFS, 0); - } - mutex_unlock(&dirty_i->seglist_lock); -} - -static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) -{ - struct sit_info *sit_i = SIT_I(sbi); - if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) - sit_i->dirty_sentries++; -} - -static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, - unsigned int segno, int modified) -{ - struct seg_entry *se = get_seg_entry(sbi, segno); - se->type = type; - if (modified) - __mark_sit_entry_dirty(sbi, segno); -} - -static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) -{ - struct seg_entry *se; - unsigned int segno, offset; - long int new_vblocks; - bool check_map = false; - - segno = GET_SEGNO(sbi, blkaddr); - - se = get_seg_entry(sbi, segno); - new_vblocks = se->valid_blocks + del; - offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); - - if (new_vblocks < 0 || new_vblocks > sbi->blocks_per_seg || - (new_vblocks >> (sizeof(unsigned short) << 3))) - if (f2fs_handle_error(sbi)) - check_map = true; - - se->mtime = get_mtime(sbi); - SIT_I(sbi)->max_mtime = se->mtime; - - /* Update valid block bitmap */ - if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) - if (f2fs_handle_error(sbi)) - check_map = true; - } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) - if (f2fs_handle_error(sbi)) - check_map = true; - } - - if (unlikely(check_map)) { - int i; - long int vblocks = 0; - - f2fs_msg(sbi->sb, KERN_ERR, - "cannot %svalidate block %u in segment %u with %hu valid blocks", - (del < 0) ? "in" : "", - offset, segno, se->valid_blocks); - - /* assume the count was stale to start */ - del = 0; - for (i = 0; i < sbi->blocks_per_seg; i++) - if (f2fs_test_bit(i, se->cur_valid_map)) - vblocks++; - if (vblocks != se->valid_blocks) { - f2fs_msg(sbi->sb, KERN_INFO, "correcting valid block " - "counts %d -> %ld", se->valid_blocks, vblocks); - /* make accounting corrections */ - del = vblocks - se->valid_blocks; - } - } - se->valid_blocks += del; - - if (!f2fs_test_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks += del; - - __mark_sit_entry_dirty(sbi, segno); - - /* update total number of valid blocks to be written in ckpt area */ - SIT_I(sbi)->written_valid_blocks += del; - - if (sbi->segs_per_sec > 1) - get_sec_entry(sbi, segno)->valid_blocks += del; -} - -static void refresh_sit_entry(struct f2fs_sb_info *sbi, - block_t old_blkaddr, block_t new_blkaddr) -{ - update_sit_entry(sbi, new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - update_sit_entry(sbi, old_blkaddr, -1); -} - -void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) -{ - unsigned int segno = GET_SEGNO(sbi, addr); - struct sit_info *sit_i = SIT_I(sbi); - - BUG_ON(addr == NULL_ADDR); - if (addr == NEW_ADDR) - return; - - if (segno >= TOTAL_SEGS(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, "invalid segment number %u", segno); - if (f2fs_handle_error(sbi)) - return; - } - - /* add it into sit main buffer */ - mutex_lock(&sit_i->sentry_lock); - - update_sit_entry(sbi, addr, -1); - - /* add it into dirty seglist */ - locate_dirty_segment(sbi, segno); - - mutex_unlock(&sit_i->sentry_lock); -} - -/* - * This function should be resided under the curseg_mutex lock - */ -static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, - struct f2fs_summary *sum) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - void *addr = curseg->sum_blk; - addr += curseg->next_blkoff * sizeof(struct f2fs_summary); - memcpy(addr, sum, sizeof(struct f2fs_summary)); -} - -/* - * Calculate the number of current summary pages for writing - */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi) -{ - int total_size_bytes = 0; - int valid_sum_count = 0; - int i, sum_space; - - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - if (sbi->ckpt->alloc_type[i] == SSR) - valid_sum_count += sbi->blocks_per_seg; - else - valid_sum_count += curseg_blkoff(sbi, i); - } - - total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) - + sizeof(struct nat_journal) + 2 - + sizeof(struct sit_journal) + 2; - sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; - if (total_size_bytes < sum_space) - return 1; - else if (total_size_bytes < 2 * sum_space) - return 2; - return 3; -} - -/* - * Caller should put this summary page - */ -struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) -{ - return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); -} - -static void write_sum_page(struct f2fs_sb_info *sbi, - struct f2fs_summary_block *sum_blk, block_t blk_addr) -{ - struct page *page = grab_meta_page(sbi, blk_addr); - void *kaddr = page_address(page); - memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); - set_page_dirty(page); - f2fs_put_page(page, 1); -} - -static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int segno = curseg->segno + 1; - struct free_segmap_info *free_i = FREE_I(sbi); - - if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) - return !test_bit(segno, free_i->free_segmap); - return 0; -} - -/* - * Find a new segment from the free segments bitmap to right order - * This function should be returned with success, otherwise BUG - */ -static void get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec, int dir) -{ - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno, secno, zoneno; - unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; - unsigned int hint = *newseg / sbi->segs_per_sec; - unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); - unsigned int left_start = hint; - bool init = true; - int go_left = 0; - int i; - - write_lock(&free_i->segmap_lock); - - if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { - segno = find_next_zero_bit(free_i->free_segmap, - TOTAL_SEGS(sbi), *newseg + 1); - if (segno - *newseg < sbi->segs_per_sec - - (*newseg % sbi->segs_per_sec)) - goto got_it; - } -find_other_zone: - secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); - if (secno >= TOTAL_SECS(sbi)) { - if (dir == ALLOC_RIGHT) { - secno = find_next_zero_bit(free_i->free_secmap, - TOTAL_SECS(sbi), 0); - BUG_ON(secno >= TOTAL_SECS(sbi)); - } else { - go_left = 1; - left_start = hint - 1; - } - } - if (go_left == 0) - goto skip_left; - - while (test_bit(left_start, free_i->free_secmap)) { - if (left_start > 0) { - left_start--; - continue; - } - left_start = find_next_zero_bit(free_i->free_secmap, - TOTAL_SECS(sbi), 0); - BUG_ON(left_start >= TOTAL_SECS(sbi)); - break; - } - secno = left_start; -skip_left: - hint = secno; - segno = secno * sbi->segs_per_sec; - zoneno = secno / sbi->secs_per_zone; - - /* give up on finding another zone */ - if (!init) - goto got_it; - if (sbi->secs_per_zone == 1) - goto got_it; - if (zoneno == old_zoneno) - goto got_it; - if (dir == ALLOC_LEFT) { - if (!go_left && zoneno + 1 >= total_zones) - goto got_it; - if (go_left && zoneno == 0) - goto got_it; - } - for (i = 0; i < NR_CURSEG_TYPE; i++) - if (CURSEG_I(sbi, i)->zone == zoneno) - break; - - if (i < NR_CURSEG_TYPE) { - /* zone is in user, try another */ - if (go_left) - hint = zoneno * sbi->secs_per_zone - 1; - else if (zoneno + 1 >= total_zones) - hint = 0; - else - hint = (zoneno + 1) * sbi->secs_per_zone; - init = false; - goto find_other_zone; - } -got_it: - /* set it as dirty segment in free segmap */ - BUG_ON(test_bit(segno, free_i->free_segmap)); - __set_inuse(sbi, segno); - *newseg = segno; - write_unlock(&free_i->segmap_lock); -} - -static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - struct summary_footer *sum_footer; - - curseg->segno = curseg->next_segno; - curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); - curseg->next_blkoff = 0; - curseg->next_segno = NULL_SEGNO; - - sum_footer = &(curseg->sum_blk->footer); - memset(sum_footer, 0, sizeof(struct summary_footer)); - if (IS_DATASEG(type)) - SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); - if (IS_NODESEG(type)) - SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); - __set_sit_entry_type(sbi, type, curseg->segno, modified); -} - -/* - * Allocate a current working segment. - * This function always allocates a free segment in LFS manner. - */ -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int segno = curseg->segno; - int dir = ALLOC_LEFT; - - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, segno)); - if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) - dir = ALLOC_RIGHT; - - if (test_opt(sbi, NOHEAP)) - dir = ALLOC_RIGHT; - - get_new_segment(sbi, &segno, new_sec, dir); - curseg->next_segno = segno; - reset_curseg(sbi, type, 1); - curseg->alloc_type = LFS; -} - -static void __next_free_blkoff(struct f2fs_sb_info *sbi, - struct curseg_info *seg, block_t start) -{ - struct seg_entry *se = get_seg_entry(sbi, seg->segno); - block_t ofs; - for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { - if (!f2fs_test_bit(ofs, se->ckpt_valid_map) - && !f2fs_test_bit(ofs, se->cur_valid_map)) - break; - } - seg->next_blkoff = ofs; -} - -/* - * If a segment is written by LFS manner, next block offset is just obtained - * by increasing the current block offset. However, if a segment is written by - * SSR manner, next block offset obtained by calling __next_free_blkoff - */ -static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, - struct curseg_info *seg) -{ - if (seg->alloc_type == SSR) - __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); - else - seg->next_blkoff++; -} - -/* - * This function always allocates a used segment (from dirty seglist) by SSR - * manner, so it should recover the existing segment information of valid blocks - */ -static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int new_segno = curseg->next_segno; - struct f2fs_summary_block *sum_node; - struct page *sum_page; - - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); - __set_test_and_inuse(sbi, new_segno); - - mutex_lock(&dirty_i->seglist_lock); - __remove_dirty_segment(sbi, new_segno, PRE); - __remove_dirty_segment(sbi, new_segno, DIRTY); - mutex_unlock(&dirty_i->seglist_lock); - - reset_curseg(sbi, type, 1); - curseg->alloc_type = SSR; - __next_free_blkoff(sbi, curseg, 0); - - if (reuse) { - sum_page = get_sum_page(sbi, new_segno); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); - memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); - f2fs_put_page(sum_page, 1); - } -} - -static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; - - if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) - return v_ops->get_victim(sbi, - &(curseg)->next_segno, BG_GC, type, SSR); - - /* For data segments, let's do SSR more intensively */ - for (; type >= CURSEG_HOT_DATA; type--) - if (v_ops->get_victim(sbi, &(curseg)->next_segno, - BG_GC, type, SSR)) - return 1; - return 0; -} - -/* - * flush out current segment and replace it with new segment - * This function should be returned with success, otherwise BUG - */ -static void allocate_segment_by_default(struct f2fs_sb_info *sbi, - int type, bool force) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - - if (force) - new_curseg(sbi, type, true); - else if (type == CURSEG_WARM_NODE) - new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) - new_curseg(sbi, type, false); - else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) - change_curseg(sbi, type, true); - else - new_curseg(sbi, type, false); -#ifdef CONFIG_F2FS_STAT_FS - sbi->segment_count[curseg->alloc_type]++; -#endif -} - -void allocate_new_segments(struct f2fs_sb_info *sbi) -{ - struct curseg_info *curseg; - unsigned int old_curseg; - int i; - - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - curseg = CURSEG_I(sbi, i); - old_curseg = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_curseg); - } -} - -static const struct segment_allocation default_salloc_ops = { - .allocate_segment = allocate_segment_by_default, -}; - -static void f2fs_end_io_write(struct bio *bio, int err) -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct bio_private *p = bio->bi_private; - - do { - struct page *page = bvec->bv_page; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - if (!uptodate) { - SetPageError(page); - if (page->mapping) - set_bit(AS_EIO, &page->mapping->flags); - set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); - p->sbi->sb->s_flags |= MS_RDONLY; - } - end_page_writeback(page); - dec_page_count(p->sbi, F2FS_WRITEBACK); - } while (bvec >= bio->bi_io_vec); - - if (p->is_sync) - complete(p->wait); - kfree(p); - bio_put(bio); -} - -struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) -{ - struct bio *bio; - - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - bio->bi_bdev = bdev; - bio->bi_private = NULL; - - return bio; -} - -static void do_submit_bio(struct f2fs_sb_info *sbi, - enum page_type type, bool sync) -{ - int rw = sync ? WRITE_SYNC : WRITE; - enum page_type btype = type > META ? META : type; - - if (type >= META_FLUSH) - rw = WRITE_FLUSH_FUA; - - if (btype == META) - rw |= REQ_META; - - if (sbi->bio[btype]) { - struct bio_private *p = sbi->bio[btype]->bi_private; - p->sbi = sbi; - sbi->bio[btype]->bi_end_io = f2fs_end_io_write; - - trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); - - if (type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - p->is_sync = true; - p->wait = &wait; - submit_bio(rw, sbi->bio[btype]); - wait_for_completion(&wait); - } else { - p->is_sync = false; - submit_bio(rw, sbi->bio[btype]); - } - sbi->bio[btype] = NULL; - } -} - -void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) -{ - down_write(&sbi->bio_sem); - do_submit_bio(sbi, type, sync); - up_write(&sbi->bio_sem); -} - -static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, enum page_type type) -{ - struct block_device *bdev = sbi->sb->s_bdev; - - verify_block_addr(sbi, blk_addr); - - down_write(&sbi->bio_sem); - - inc_page_count(sbi, F2FS_WRITEBACK); - - if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) - do_submit_bio(sbi, type, false); -alloc_new: - if (sbi->bio[type] == NULL) { - struct bio_private *priv; -retry: - priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); - if (!priv) { - cond_resched(); - goto retry; - } - - sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); - sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - sbi->bio[type]->bi_private = priv; - /* - * The end_io will be assigned at the sumbission phase. - * Until then, let bio_add_page() merge consecutive IOs as much - * as possible. - */ - } - - if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < - PAGE_CACHE_SIZE) { - do_submit_bio(sbi, type, false); - goto alloc_new; - } - - sbi->last_block_in_bio[type] = blk_addr; - - up_write(&sbi->bio_sem); - trace_f2fs_submit_write_page(page, blk_addr, type); -} - -void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool sync) -{ - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - if (PageWriteback(page)) { - f2fs_submit_bio(sbi, type, sync); - wait_on_page_writeback(page); - } -} - -static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - if (curseg->next_blkoff < sbi->blocks_per_seg) - return true; - return false; -} - -static int __get_segment_type_2(struct page *page, enum page_type p_type) -{ - if (p_type == DATA) - return CURSEG_HOT_DATA; - else - return CURSEG_HOT_NODE; -} - -static int __get_segment_type_4(struct page *page, enum page_type p_type) -{ - if (p_type == DATA) { - struct inode *inode = page->mapping->host; - - if (S_ISDIR(inode->i_mode)) - return CURSEG_HOT_DATA; - else - return CURSEG_COLD_DATA; - } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; - else - return CURSEG_COLD_NODE; - } -} - -static int __get_segment_type_6(struct page *page, enum page_type p_type) -{ - if (p_type == DATA) { - struct inode *inode = page->mapping->host; - - if (S_ISDIR(inode->i_mode)) - return CURSEG_HOT_DATA; - else if (is_cold_data(page) || file_is_cold(inode)) - return CURSEG_COLD_DATA; - else - return CURSEG_WARM_DATA; - } else { - if (IS_DNODE(page)) - return is_cold_node(page) ? CURSEG_WARM_NODE : - CURSEG_HOT_NODE; - else - return CURSEG_COLD_NODE; - } -} - -static int __get_segment_type(struct page *page, enum page_type p_type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - switch (sbi->active_logs) { - case 2: - return __get_segment_type_2(page, p_type); - case 4: - return __get_segment_type_4(page, p_type); - } - /* NR_CURSEG_TYPE(6) logs by default */ - BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); - return __get_segment_type_6(page, p_type); -} - -static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, enum page_type p_type) -{ - struct sit_info *sit_i = SIT_I(sbi); - struct curseg_info *curseg; - unsigned int old_cursegno; - int type; - - type = __get_segment_type(page, p_type); - curseg = CURSEG_I(sbi, type); - - mutex_lock(&curseg->curseg_mutex); - - *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - old_cursegno = curseg->segno; - - /* - * __add_sum_entry should be resided under the curseg_mutex - * because, this function updates a summary entry in the - * current summary block. - */ - __add_sum_entry(sbi, type, sum); - - mutex_lock(&sit_i->sentry_lock); - __refresh_next_blkoff(sbi, curseg); -#ifdef CONFIG_F2FS_STAT_FS - sbi->block_count[curseg->alloc_type]++; -#endif - - /* - * SIT information should be updated before segment allocation, - * since SSR needs latest valid block information. - */ - refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - - if (!__has_curseg_space(sbi, type)) - sit_i->s_ops->allocate_segment(sbi, type, false); - - locate_dirty_segment(sbi, old_cursegno); - locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); - mutex_unlock(&sit_i->sentry_lock); - - if (p_type == NODE) - fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); - - /* writeout dirty page into bdev */ - submit_write_page(sbi, page, *new_blkaddr, p_type); - - mutex_unlock(&curseg->curseg_mutex); -} - -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) -{ - set_page_writeback(page); - submit_write_page(sbi, page, page->index, META); -} - -void write_node_page(struct f2fs_sb_info *sbi, struct page *page, - unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) -{ - struct f2fs_summary sum; - set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); -} - -void write_data_page(struct inode *inode, struct page *page, - struct dnode_of_data *dn, block_t old_blkaddr, - block_t *new_blkaddr) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_summary sum; - struct node_info ni; - - BUG_ON(old_blkaddr == NULL_ADDR); - get_node_info(sbi, dn->nid, &ni); - set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - - do_write_page(sbi, page, old_blkaddr, - new_blkaddr, &sum, DATA); -} - -void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blk_addr) -{ - submit_write_page(sbi, page, old_blk_addr, DATA); -} - -void recover_data_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) -{ - struct sit_info *sit_i = SIT_I(sbi); - struct curseg_info *curseg; - unsigned int segno, old_cursegno; - struct seg_entry *se; - int type; - - segno = GET_SEGNO(sbi, new_blkaddr); - se = get_seg_entry(sbi, segno); - type = se->type; - - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { - if (old_blkaddr == NULL_ADDR) - type = CURSEG_COLD_DATA; - else - type = CURSEG_WARM_DATA; - } - curseg = CURSEG_I(sbi, type); - - mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); - - old_cursegno = curseg->segno; - - /* change the current segment */ - if (segno != curseg->segno) { - curseg->next_segno = segno; - change_curseg(sbi, type, true); - } - - curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & - (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum); - - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); - - locate_dirty_segment(sbi, old_cursegno); - locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); - - mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); -} - -void rewrite_node_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) -{ - struct sit_info *sit_i = SIT_I(sbi); - int type = CURSEG_WARM_NODE; - struct curseg_info *curseg; - unsigned int segno, old_cursegno; - block_t next_blkaddr = next_blkaddr_of_node(page); - unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); - - curseg = CURSEG_I(sbi, type); - - mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); - - segno = GET_SEGNO(sbi, new_blkaddr); - old_cursegno = curseg->segno; - - /* change the current segment */ - if (segno != curseg->segno) { - curseg->next_segno = segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & - (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum); - - /* change the current log to the next block addr in advance */ - if (next_segno != segno) { - curseg->next_segno = next_segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & - (sbi->blocks_per_seg - 1); - - /* rewrite node page */ - set_page_writeback(page); - submit_write_page(sbi, page, new_blkaddr, NODE); - f2fs_submit_bio(sbi, NODE, true); - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); - - locate_dirty_segment(sbi, old_cursegno); - locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); - - mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); -} - -static int read_compacted_summaries(struct f2fs_sb_info *sbi) -{ - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - struct curseg_info *seg_i; - unsigned char *kaddr; - struct page *page; - block_t start; - int i, j, offset; - - start = start_sum_block(sbi); - - page = get_meta_page(sbi, start++); - kaddr = (unsigned char *)page_address(page); - - /* Step 1: restore nat cache */ - seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); - memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); - - /* Step 2: restore sit cache */ - seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); - memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, - SUM_JOURNAL_SIZE); - offset = 2 * SUM_JOURNAL_SIZE; - - /* Step 3: restore summary entries */ - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - unsigned short blk_off; - unsigned int segno; - - seg_i = CURSEG_I(sbi, i); - segno = le32_to_cpu(ckpt->cur_data_segno[i]); - blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); - seg_i->next_segno = segno; - reset_curseg(sbi, i, 0); - seg_i->alloc_type = ckpt->alloc_type[i]; - seg_i->next_blkoff = blk_off; - - if (seg_i->alloc_type == SSR) - blk_off = sbi->blocks_per_seg; - - for (j = 0; j < blk_off; j++) { - struct f2fs_summary *s; - s = (struct f2fs_summary *)(kaddr + offset); - seg_i->sum_blk->entries[j] = *s; - offset += SUMMARY_SIZE; - if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - - SUM_FOOTER_SIZE) - continue; - - f2fs_put_page(page, 1); - page = NULL; - - page = get_meta_page(sbi, start++); - kaddr = (unsigned char *)page_address(page); - offset = 0; - } - } - f2fs_put_page(page, 1); - return 0; -} - -static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) -{ - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - struct f2fs_summary_block *sum; - struct curseg_info *curseg; - struct page *new; - unsigned short blk_off; - unsigned int segno = 0; - block_t blk_addr = 0; - - /* get segment number and block addr */ - if (IS_DATASEG(type)) { - segno = le32_to_cpu(ckpt->cur_data_segno[type]); - blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - - CURSEG_HOT_DATA]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) - blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); - else - blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); - } else { - segno = le32_to_cpu(ckpt->cur_node_segno[type - - CURSEG_HOT_NODE]); - blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - - CURSEG_HOT_NODE]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) - blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, - type - CURSEG_HOT_NODE); - else - blk_addr = GET_SUM_BLOCK(sbi, segno); - } - - new = get_meta_page(sbi, blk_addr); - sum = (struct f2fs_summary_block *)page_address(new); - - if (IS_NODESEG(type)) { - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { - struct f2fs_summary *ns = &sum->entries[0]; - int i; - for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { - ns->version = 0; - ns->ofs_in_node = 0; - } - } else { - if (restore_node_summary(sbi, segno, sum)) { - f2fs_put_page(new, 1); - return -EINVAL; - } - } - } - - /* set uncompleted segment to curseg */ - curseg = CURSEG_I(sbi, type); - mutex_lock(&curseg->curseg_mutex); - memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); - curseg->next_segno = segno; - reset_curseg(sbi, type, 0); - curseg->alloc_type = ckpt->alloc_type[type]; - curseg->next_blkoff = blk_off; - mutex_unlock(&curseg->curseg_mutex); - f2fs_put_page(new, 1); - return 0; -} - -static int restore_curseg_summaries(struct f2fs_sb_info *sbi) -{ - int type = CURSEG_HOT_DATA; - - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { - /* restore for compacted data summary */ - if (read_compacted_summaries(sbi)) - return -EINVAL; - type = CURSEG_HOT_NODE; - } - - for (; type <= CURSEG_COLD_NODE; type++) - if (read_normal_summaries(sbi, type)) - return -EINVAL; - return 0; -} - -static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) -{ - struct page *page; - unsigned char *kaddr; - struct f2fs_summary *summary; - struct curseg_info *seg_i; - int written_size = 0; - int i, j; - - page = grab_meta_page(sbi, blkaddr++); - kaddr = (unsigned char *)page_address(page); - - /* Step 1: write nat cache */ - seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); - memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); - written_size += SUM_JOURNAL_SIZE; - - /* Step 2: write sit cache */ - seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); - memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, - SUM_JOURNAL_SIZE); - written_size += SUM_JOURNAL_SIZE; - - set_page_dirty(page); - - /* Step 3: write summary entries */ - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - unsigned short blkoff; - seg_i = CURSEG_I(sbi, i); - if (sbi->ckpt->alloc_type[i] == SSR) - blkoff = sbi->blocks_per_seg; - else - blkoff = curseg_blkoff(sbi, i); - - for (j = 0; j < blkoff; j++) { - if (!page) { - page = grab_meta_page(sbi, blkaddr++); - kaddr = (unsigned char *)page_address(page); - written_size = 0; - } - summary = (struct f2fs_summary *)(kaddr + written_size); - *summary = seg_i->sum_blk->entries[j]; - written_size += SUMMARY_SIZE; - set_page_dirty(page); - - if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - - SUM_FOOTER_SIZE) - continue; - - f2fs_put_page(page, 1); - page = NULL; - } - } - if (page) - f2fs_put_page(page, 1); -} - -static void write_normal_summaries(struct f2fs_sb_info *sbi, - block_t blkaddr, int type) -{ - int i, end; - if (IS_DATASEG(type)) - end = type + NR_CURSEG_DATA_TYPE; - else - end = type + NR_CURSEG_NODE_TYPE; - - for (i = type; i < end; i++) { - struct curseg_info *sum = CURSEG_I(sbi, i); - mutex_lock(&sum->curseg_mutex); - write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); - mutex_unlock(&sum->curseg_mutex); - } -} - -void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) -{ - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) - write_compacted_summaries(sbi, start_blk); - else - write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); -} - -void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) -{ - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) - write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); -} - -int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, - unsigned int val, int alloc) -{ - int i; - - if (type == NAT_JOURNAL) { - for (i = 0; i < nats_in_cursum(sum); i++) { - if (le32_to_cpu(nid_in_journal(sum, i)) == val) - return i; - } - if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) - return update_nats_in_cursum(sum, 1); - } else if (type == SIT_JOURNAL) { - for (i = 0; i < sits_in_cursum(sum); i++) - if (le32_to_cpu(segno_in_journal(sum, i)) == val) - return i; - if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) - return update_sits_in_cursum(sum, 1); - } - return -1; -} - -static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); -} - -static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, - unsigned int start) -{ - struct sit_info *sit_i = SIT_I(sbi); - struct page *src_page, *dst_page; - pgoff_t src_off, dst_off; - void *src_addr, *dst_addr; - - src_off = current_sit_addr(sbi, start); - dst_off = next_sit_addr(sbi, src_off); - - /* get current sit block page without lock */ - src_page = get_meta_page(sbi, src_off); - dst_page = grab_meta_page(sbi, dst_off); - BUG_ON(PageDirty(src_page)); - - src_addr = page_address(src_page); - dst_addr = page_address(dst_page); - memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); - - set_page_dirty(dst_page); - f2fs_put_page(src_page, 1); - - set_to_next_sit(sit_i, start); - - return dst_page; -} - -static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) -{ - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; - int i; - - /* - * If the journal area in the current summary is full of sit entries, - * all the sit entries will be flushed. Otherwise the sit entries - * are not able to replace with newly hot sit entries. - */ - if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { - for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { - unsigned int segno; - segno = le32_to_cpu(segno_in_journal(sum, i)); - __mark_sit_entry_dirty(sbi, segno); - } - update_sits_in_cursum(sum, -sits_in_cursum(sum)); - return 1; - } - return 0; -} - -/* - * CP calls this function, which flushes SIT entries including sit_journal, - * and moves prefree segs to free segs. - */ -void flush_sit_entries(struct f2fs_sb_info *sbi) -{ - struct sit_info *sit_i = SIT_I(sbi); - unsigned long *bitmap = sit_i->dirty_sentries_bitmap; - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; - unsigned long nsegs = TOTAL_SEGS(sbi); - struct page *page = NULL; - struct f2fs_sit_block *raw_sit = NULL; - unsigned int start = 0, end = 0; - unsigned int segno = -1; - bool flushed; - - mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); - - /* - * "flushed" indicates whether sit entries in journal are flushed - * to the SIT area or not. - */ - flushed = flush_sits_in_journal(sbi); - - while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { - struct seg_entry *se = get_seg_entry(sbi, segno); - int sit_offset, offset; - - sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); - - if (flushed) - goto to_sit_page; - - offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); - if (offset >= 0) { - segno_in_journal(sum, offset) = cpu_to_le32(segno); - seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); - goto flush_done; - } -to_sit_page: - if (!page || (start > segno) || (segno > end)) { - if (page) { - f2fs_put_page(page, 1); - page = NULL; - } - - start = START_SEGNO(sit_i, segno); - end = start + SIT_ENTRY_PER_BLOCK - 1; - - /* read sit block that will be updated */ - page = get_next_sit_page(sbi, start); - raw_sit = page_address(page); - } - - /* udpate entry in SIT block */ - seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); -flush_done: - __clear_bit(segno, bitmap); - sit_i->dirty_sentries--; - } - mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); - - /* writeout last modified SIT block */ - f2fs_put_page(page, 1); - - set_prefree_as_free_segments(sbi); -} - -static int build_sit_info(struct f2fs_sb_info *sbi) -{ - struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - struct sit_info *sit_i; - unsigned int sit_segs, start; - char *src_bitmap, *dst_bitmap; - unsigned int bitmap_size; - - /* allocate memory for SIT information */ - sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); - if (!sit_i) - return -ENOMEM; - - SM_I(sbi)->sit_info = sit_i; - - sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); - if (!sit_i->sentries) - return -ENOMEM; - - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!sit_i->dirty_sentries_bitmap) - return -ENOMEM; - - for (start = 0; start < TOTAL_SEGS(sbi); start++) { - sit_i->sentries[start].cur_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - sit_i->sentries[start].ckpt_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].cur_valid_map - || !sit_i->sentries[start].ckpt_valid_map) - return -ENOMEM; - } - - if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * - sizeof(struct sec_entry)); - if (!sit_i->sec_entries) - return -ENOMEM; - } - - /* get information related with SIT */ - sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; - - /* setup SIT bitmap from ckeckpoint pack */ - bitmap_size = __bitmap_size(sbi, SIT_BITMAP); - src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); - - dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); - if (!dst_bitmap) - return -ENOMEM; - - /* init SIT information */ - sit_i->s_ops = &default_salloc_ops; - - sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); - sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; - sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); - sit_i->sit_bitmap = dst_bitmap; - sit_i->bitmap_size = bitmap_size; - sit_i->dirty_sentries = 0; - sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; - sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; - mutex_init(&sit_i->sentry_lock); - return 0; -} - -static int build_free_segmap(struct f2fs_sb_info *sbi) -{ - struct f2fs_sm_info *sm_info = SM_I(sbi); - struct free_segmap_info *free_i; - unsigned int bitmap_size, sec_bitmap_size; - - /* allocate memory for free segmap information */ - free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); - if (!free_i) - return -ENOMEM; - - SM_I(sbi)->free_info = free_i; - - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); - free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); - if (!free_i->free_segmap) - return -ENOMEM; - - sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); - free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); - if (!free_i->free_secmap) - return -ENOMEM; - - /* set all segments as dirty temporarily */ - memset(free_i->free_segmap, 0xff, bitmap_size); - memset(free_i->free_secmap, 0xff, sec_bitmap_size); - - /* init free segmap information */ - free_i->start_segno = - (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); - free_i->free_segments = 0; - free_i->free_sections = 0; - rwlock_init(&free_i->segmap_lock); - return 0; -} - -static int build_curseg(struct f2fs_sb_info *sbi) -{ - struct curseg_info *array; - int i; - - array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); - if (!array) - return -ENOMEM; - - SM_I(sbi)->curseg_array = array; - - for (i = 0; i < NR_CURSEG_TYPE; i++) { - mutex_init(&array[i].curseg_mutex); - array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (!array[i].sum_blk) - return -ENOMEM; - array[i].segno = NULL_SEGNO; - array[i].next_blkoff = 0; - } - return restore_curseg_summaries(sbi); -} - -static void build_sit_entries(struct f2fs_sb_info *sbi) -{ - struct sit_info *sit_i = SIT_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); - struct f2fs_summary_block *sum = curseg->sum_blk; - unsigned int start; - - for (start = 0; start < TOTAL_SEGS(sbi); start++) { - struct seg_entry *se = &sit_i->sentries[start]; - struct f2fs_sit_block *sit_blk; - struct f2fs_sit_entry sit; - struct page *page; - int i; - - mutex_lock(&curseg->curseg_mutex); - for (i = 0; i < sits_in_cursum(sum); i++) { - if (le32_to_cpu(segno_in_journal(sum, i)) == start) { - sit = sit_in_journal(sum, i); - mutex_unlock(&curseg->curseg_mutex); - goto got_it; - } - } - mutex_unlock(&curseg->curseg_mutex); - page = get_current_sit_page(sbi, start); - sit_blk = (struct f2fs_sit_block *)page_address(page); - sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; - f2fs_put_page(page, 1); -got_it: - check_block_count(sbi, start, &sit); - seg_info_from_raw_sit(se, &sit); - if (sbi->segs_per_sec > 1) { - struct sec_entry *e = get_sec_entry(sbi, start); - e->valid_blocks += se->valid_blocks; - } - } -} - -static void init_free_segmap(struct f2fs_sb_info *sbi) -{ - unsigned int start; - int type; - - for (start = 0; start < TOTAL_SEGS(sbi); start++) { - struct seg_entry *sentry = get_seg_entry(sbi, start); - if (!sentry->valid_blocks) - __set_free(sbi, start); - } - - /* set use the current segments */ - for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { - struct curseg_info *curseg_t = CURSEG_I(sbi, type); - __set_test_and_inuse(sbi, curseg_t->segno); - } -} - -static void init_dirty_segmap(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); - unsigned short valid_blocks; - - while (1) { - /* find dirty segment based on free segmap */ - segno = find_next_inuse(free_i, total_segs, offset); - if (segno >= total_segs) - break; - offset = segno + 1; - valid_blocks = get_valid_blocks(sbi, segno, 0); - if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) - continue; - mutex_lock(&dirty_i->seglist_lock); - __locate_dirty_segment(sbi, segno, DIRTY); - mutex_unlock(&dirty_i->seglist_lock); - } -} - -static int init_victim_secmap(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); - - dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dirty_i->victim_secmap) - return -ENOMEM; - return 0; -} - -static int build_dirty_segmap(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i; - unsigned int bitmap_size, i; - - /* allocate memory for dirty segments list information */ - dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); - if (!dirty_i) - return -ENOMEM; - - SM_I(sbi)->dirty_info = dirty_i; - mutex_init(&dirty_i->seglist_lock); - - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); - - for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); - if (!dirty_i->dirty_segmap[i]) - return -ENOMEM; - } - - init_dirty_segmap(sbi); - return init_victim_secmap(sbi); -} - -/* - * Update min, max modified time for cost-benefit GC algorithm - */ -static void init_min_max_mtime(struct f2fs_sb_info *sbi) -{ - struct sit_info *sit_i = SIT_I(sbi); - unsigned int segno; - - mutex_lock(&sit_i->sentry_lock); - - sit_i->min_mtime = LLONG_MAX; - - for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { - unsigned int i; - unsigned long long mtime = 0; - - for (i = 0; i < sbi->segs_per_sec; i++) - mtime += get_seg_entry(sbi, segno + i)->mtime; - - mtime = div_u64(mtime, sbi->segs_per_sec); - - if (sit_i->min_mtime > mtime) - sit_i->min_mtime = mtime; - } - sit_i->max_mtime = get_mtime(sbi); - mutex_unlock(&sit_i->sentry_lock); -} - -int build_segment_manager(struct f2fs_sb_info *sbi) -{ - struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - struct f2fs_sm_info *sm_info; - int err; - - sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); - if (!sm_info) - return -ENOMEM; - - /* init sm info */ - sbi->sm_info = sm_info; - INIT_LIST_HEAD(&sm_info->wblist_head); - spin_lock_init(&sm_info->wblist_lock); - sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); - sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); - sm_info->segment_count = le32_to_cpu(raw_super->segment_count); - sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); - sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); - sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); - sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); - - err = build_sit_info(sbi); - if (err) - return err; - err = build_free_segmap(sbi); - if (err) - return err; - err = build_curseg(sbi); - if (err) - return err; - - /* reinit free segmap based on SIT */ - build_sit_entries(sbi); - - init_free_segmap(sbi); - err = build_dirty_segmap(sbi); - if (err) - return err; - - init_min_max_mtime(sbi); - return 0; -} - -static void discard_dirty_segmap(struct f2fs_sb_info *sbi, - enum dirty_type dirty_type) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - - mutex_lock(&dirty_i->seglist_lock); - kfree(dirty_i->dirty_segmap[dirty_type]); - dirty_i->nr_dirty[dirty_type] = 0; - mutex_unlock(&dirty_i->seglist_lock); -} - -static void destroy_victim_secmap(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - kfree(dirty_i->victim_secmap); -} - -static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - int i; - - if (!dirty_i) - return; - - /* discard pre-free/dirty segments list */ - for (i = 0; i < NR_DIRTY_TYPE; i++) - discard_dirty_segmap(sbi, i); - - destroy_victim_secmap(sbi); - SM_I(sbi)->dirty_info = NULL; - kfree(dirty_i); -} - -static void destroy_curseg(struct f2fs_sb_info *sbi) -{ - struct curseg_info *array = SM_I(sbi)->curseg_array; - int i; - - if (!array) - return; - SM_I(sbi)->curseg_array = NULL; - for (i = 0; i < NR_CURSEG_TYPE; i++) - kfree(array[i].sum_blk); - kfree(array); -} - -static void destroy_free_segmap(struct f2fs_sb_info *sbi) -{ - struct free_segmap_info *free_i = SM_I(sbi)->free_info; - if (!free_i) - return; - SM_I(sbi)->free_info = NULL; - kfree(free_i->free_segmap); - kfree(free_i->free_secmap); - kfree(free_i); -} - -static void destroy_sit_info(struct f2fs_sb_info *sbi) -{ - struct sit_info *sit_i = SIT_I(sbi); - unsigned int start; - - if (!sit_i) - return; - - if (sit_i->sentries) { - for (start = 0; start < TOTAL_SEGS(sbi); start++) { - kfree(sit_i->sentries[start].cur_valid_map); - kfree(sit_i->sentries[start].ckpt_valid_map); - } - } - vfree(sit_i->sentries); - vfree(sit_i->sec_entries); - kfree(sit_i->dirty_sentries_bitmap); - - SM_I(sbi)->sit_info = NULL; - kfree(sit_i->sit_bitmap); - kfree(sit_i); -} - -void destroy_segment_manager(struct f2fs_sb_info *sbi) -{ - struct f2fs_sm_info *sm_info = SM_I(sbi); - destroy_dirty_segmap(sbi); - destroy_curseg(sbi); - destroy_free_segmap(sbi); - destroy_sit_info(sbi); - sbi->sm_info = NULL; - kfree(sm_info); -} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h deleted file mode 100644 index 062424a0e4c..00000000000 --- a/fs/f2fs/segment.h +++ /dev/null @@ -1,637 +0,0 @@ -/* - * fs/f2fs/segment.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include - -/* constant macro */ -#define NULL_SEGNO ((unsigned int)(~0)) -#define NULL_SECNO ((unsigned int)(~0)) - -/* L: Logical segment # in volume, R: Relative segment # in main area */ -#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) -#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) - -#define IS_DATASEG(t) \ - ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ - (t == CURSEG_WARM_DATA)) - -#define IS_NODESEG(t) \ - ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ - (t == CURSEG_WARM_NODE)) - -#define IS_CURSEG(sbi, seg) \ - ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) - -#define IS_CURSEC(sbi, secno) \ - ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - sbi->segs_per_sec)) \ - -#define START_BLOCK(sbi, segno) \ - (SM_I(sbi)->seg0_blkaddr + \ - (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) -#define NEXT_FREE_BLKADDR(sbi, curseg) \ - (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) - -#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) - -#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ - ((blk_addr) - SM_I(sbi)->seg0_blkaddr) -#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) -#define GET_SEGNO(sbi, blk_addr) \ - (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ - NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ - GET_SEGNO_FROM_SEG0(sbi, blk_addr))) -#define GET_SECNO(sbi, segno) \ - ((segno) / sbi->segs_per_sec) -#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ - ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) - -#define GET_SUM_BLOCK(sbi, segno) \ - ((sbi->sm_info->ssa_blkaddr) + segno) - -#define GET_SUM_TYPE(footer) ((footer)->entry_type) -#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) - -#define SIT_ENTRY_OFFSET(sit_i, segno) \ - (segno % sit_i->sents_per_block) -#define SIT_BLOCK_OFFSET(sit_i, segno) \ - (segno / SIT_ENTRY_PER_BLOCK) -#define START_SEGNO(sit_i, segno) \ - (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) -#define f2fs_bitmap_size(nr) \ - (BITS_TO_LONGS(nr) * sizeof(unsigned long)) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) -#define TOTAL_SECS(sbi) (sbi->total_sections) - -#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ - (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) -#define SECTOR_TO_BLOCK(sbi, sectors) \ - (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) - -/* during checkpoint, bio_private is used to synchronize the last bio */ -struct bio_private { - struct f2fs_sb_info *sbi; - bool is_sync; - void *wait; -}; - -/* - * indicate a block allocation direction: RIGHT and LEFT. - * RIGHT means allocating new sections towards the end of volume. - * LEFT means the opposite direction. - */ -enum { - ALLOC_RIGHT = 0, - ALLOC_LEFT -}; - -/* - * In the victim_sel_policy->alloc_mode, there are two block allocation modes. - * LFS writes data sequentially with cleaning operations. - * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. - */ -enum { - LFS = 0, - SSR -}; - -/* - * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. - * GC_CB is based on cost-benefit algorithm. - * GC_GREEDY is based on greedy algorithm. - */ -enum { - GC_CB = 0, - GC_GREEDY -}; - -/* - * BG_GC means the background cleaning job. - * FG_GC means the on-demand cleaning job. - */ -enum { - BG_GC = 0, - FG_GC -}; - -/* for a function parameter to select a victim segment */ -struct victim_sel_policy { - int alloc_mode; /* LFS or SSR */ - int gc_mode; /* GC_CB or GC_GREEDY */ - unsigned long *dirty_segmap; /* dirty segment bitmap */ - unsigned int offset; /* last scanned bitmap offset */ - unsigned int ofs_unit; /* bitmap search unit */ - unsigned int min_cost; /* minimum cost */ - unsigned int min_segno; /* segment # having min. cost */ -}; - -struct seg_entry { - unsigned short valid_blocks; /* # of valid blocks */ - unsigned char *cur_valid_map; /* validity bitmap of blocks */ - /* - * # of valid blocks and the validity bitmap stored in the the last - * checkpoint pack. This information is used by the SSR mode. - */ - unsigned short ckpt_valid_blocks; - unsigned char *ckpt_valid_map; - unsigned char type; /* segment type like CURSEG_XXX_TYPE */ - unsigned long long mtime; /* modification time of the segment */ -}; - -struct sec_entry { - unsigned int valid_blocks; /* # of valid blocks in a section */ -}; - -struct segment_allocation { - void (*allocate_segment)(struct f2fs_sb_info *, int, bool); -}; - -struct sit_info { - const struct segment_allocation *s_ops; - - block_t sit_base_addr; /* start block address of SIT area */ - block_t sit_blocks; /* # of blocks used by SIT area */ - block_t written_valid_blocks; /* # of valid blocks in main area */ - char *sit_bitmap; /* SIT bitmap pointer */ - unsigned int bitmap_size; /* SIT bitmap size */ - - unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ - unsigned int dirty_sentries; /* # of dirty sentries */ - unsigned int sents_per_block; /* # of SIT entries per block */ - struct mutex sentry_lock; /* to protect SIT cache */ - struct seg_entry *sentries; /* SIT segment-level cache */ - struct sec_entry *sec_entries; /* SIT section-level cache */ - - /* for cost-benefit algorithm in cleaning procedure */ - unsigned long long elapsed_time; /* elapsed time after mount */ - unsigned long long mounted_time; /* mount time */ - unsigned long long min_mtime; /* min. modification time */ - unsigned long long max_mtime; /* max. modification time */ -}; - -struct free_segmap_info { - unsigned int start_segno; /* start segment number logically */ - unsigned int free_segments; /* # of free segments */ - unsigned int free_sections; /* # of free sections */ - rwlock_t segmap_lock; /* free segmap lock */ - unsigned long *free_segmap; /* free segment bitmap */ - unsigned long *free_secmap; /* free section bitmap */ -}; - -/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ -enum dirty_type { - DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ - DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ - DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ - DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ - DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ - DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ - DIRTY, /* to count # of dirty segments */ - PRE, /* to count # of entirely obsolete segments */ - NR_DIRTY_TYPE -}; - -struct dirty_seglist_info { - const struct victim_selection *v_ops; /* victim selction operation */ - unsigned long *dirty_segmap[NR_DIRTY_TYPE]; - struct mutex seglist_lock; /* lock for segment bitmaps */ - int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ - unsigned long *victim_secmap; /* background GC victims */ -}; - -/* victim selection function for cleaning and SSR */ -struct victim_selection { - int (*get_victim)(struct f2fs_sb_info *, unsigned int *, - int, int, char); -}; - -/* for active log information */ -struct curseg_info { - struct mutex curseg_mutex; /* lock for consistency */ - struct f2fs_summary_block *sum_blk; /* cached summary block */ - unsigned char alloc_type; /* current allocation type */ - unsigned int segno; /* current segment number */ - unsigned short next_blkoff; /* next block offset to write */ - unsigned int zone; /* current zone number */ - unsigned int next_segno; /* preallocated segment */ -}; - -/* - * inline functions - */ -static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) -{ - return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); -} - -static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - struct sit_info *sit_i = SIT_I(sbi); - return &sit_i->sentries[segno]; -} - -static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - struct sit_info *sit_i = SIT_I(sbi); - return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; -} - -static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, - unsigned int segno, int section) -{ - /* - * In order to get # of valid blocks in a section instantly from many - * segments, f2fs manages two counting structures separately. - */ - if (section > 1) - return get_sec_entry(sbi, segno)->valid_blocks; - else - return get_seg_entry(sbi, segno)->valid_blocks; -} - -static inline void seg_info_from_raw_sit(struct seg_entry *se, - struct f2fs_sit_entry *rs) -{ - se->valid_blocks = GET_SIT_VBLOCKS(rs); - se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); - memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); - memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); - se->type = GET_SIT_TYPE(rs); - se->mtime = le64_to_cpu(rs->mtime); -} - -static inline void seg_info_to_raw_sit(struct seg_entry *se, - struct f2fs_sit_entry *rs) -{ - unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | - se->valid_blocks; - rs->vblocks = cpu_to_le16(raw_vblocks); - memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); - memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); - se->ckpt_valid_blocks = se->valid_blocks; - rs->mtime = cpu_to_le64(se->mtime); -} - -static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, - unsigned int max, unsigned int segno) -{ - unsigned int ret; - read_lock(&free_i->segmap_lock); - ret = find_next_bit(free_i->free_segmap, max, segno); - read_unlock(&free_i->segmap_lock); - return ret; -} - -static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) -{ - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; - unsigned int next; - - write_lock(&free_i->segmap_lock); - clear_bit(segno, free_i->free_segmap); - free_i->free_segments++; - - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); - if (next >= start_segno + sbi->segs_per_sec) { - clear_bit(secno, free_i->free_secmap); - free_i->free_sections++; - } - write_unlock(&free_i->segmap_lock); -} - -static inline void __set_inuse(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - set_bit(segno, free_i->free_segmap); - free_i->free_segments--; - if (!test_and_set_bit(secno, free_i->free_secmap)) - free_i->free_sections--; -} - -static inline void __set_test_and_free(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; - unsigned int next; - - write_lock(&free_i->segmap_lock); - if (test_and_clear_bit(segno, free_i->free_segmap)) { - free_i->free_segments++; - - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), - start_segno); - if (next >= start_segno + sbi->segs_per_sec) { - if (test_and_clear_bit(secno, free_i->free_secmap)) - free_i->free_sections++; - } - } - write_unlock(&free_i->segmap_lock); -} - -static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - write_lock(&free_i->segmap_lock); - if (!test_and_set_bit(segno, free_i->free_segmap)) { - free_i->free_segments--; - if (!test_and_set_bit(secno, free_i->free_secmap)) - free_i->free_sections--; - } - write_unlock(&free_i->segmap_lock); -} - -static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, - void *dst_addr) -{ - struct sit_info *sit_i = SIT_I(sbi); - memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); -} - -static inline block_t written_block_count(struct f2fs_sb_info *sbi) -{ - struct sit_info *sit_i = SIT_I(sbi); - block_t vblocks; - - mutex_lock(&sit_i->sentry_lock); - vblocks = sit_i->written_valid_blocks; - mutex_unlock(&sit_i->sentry_lock); - - return vblocks; -} - -static inline unsigned int free_segments(struct f2fs_sb_info *sbi) -{ - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int free_segs; - - read_lock(&free_i->segmap_lock); - free_segs = free_i->free_segments; - read_unlock(&free_i->segmap_lock); - - return free_segs; -} - -static inline int reserved_segments(struct f2fs_sb_info *sbi) -{ - return SM_I(sbi)->reserved_segments; -} - -static inline unsigned int free_sections(struct f2fs_sb_info *sbi) -{ - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int free_secs; - - read_lock(&free_i->segmap_lock); - free_secs = free_i->free_sections; - read_unlock(&free_i->segmap_lock); - - return free_secs; -} - -static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) -{ - return DIRTY_I(sbi)->nr_dirty[PRE]; -} - -static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) -{ - return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + - DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + - DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + - DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + - DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + - DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; -} - -static inline int overprovision_segments(struct f2fs_sb_info *sbi) -{ - return SM_I(sbi)->ovp_segments; -} - -static inline int overprovision_sections(struct f2fs_sb_info *sbi) -{ - return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; -} - -static inline int reserved_sections(struct f2fs_sb_info *sbi) -{ - return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; -} - -static inline bool need_SSR(struct f2fs_sb_info *sbi) -{ - return (free_sections(sbi) < overprovision_sections(sbi)); -} - -static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) -{ - int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); - int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - - if (sbi->por_doing) - return false; - - return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + - reserved_sections(sbi))); -} - -static inline int utilization(struct f2fs_sb_info *sbi) -{ - return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); -} - -/* - * Sometimes f2fs may be better to drop out-of-place update policy. - * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write - * data in the original place likewise other traditional file systems. - * But, currently set 100 in percentage, which means it is disabled. - * See below need_inplace_update(). - */ -#define MIN_IPU_UTIL 100 -static inline bool need_inplace_update(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - if (S_ISDIR(inode->i_mode)) - return false; - if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) - return true; - return false; -} - -static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, - int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - return curseg->segno; -} - -static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, - int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - return curseg->alloc_type; -} - -static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - return curseg->next_blkoff; -} - -static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) -{ - unsigned int end_segno = SM_I(sbi)->segment_count - 1; - BUG_ON(segno > end_segno); -} - -/* - * This function is used for only debugging. - * NOTE: In future, we have to remove this function. - */ -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) -{ - struct f2fs_sm_info *sm_info = SM_I(sbi); - block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; - block_t start_addr = sm_info->seg0_blkaddr; - block_t end_addr = start_addr + total_blks - 1; - BUG_ON(blk_addr < start_addr); - BUG_ON(blk_addr > end_addr); -} - -/* - * Summary block is always treated as invalid block - */ -static inline void check_block_count(struct f2fs_sb_info *sbi, - int segno, struct f2fs_sit_entry *raw_sit) -{ - struct f2fs_sm_info *sm_info = SM_I(sbi); - unsigned int end_segno = sm_info->segment_count - 1; - int valid_blocks = 0; - int i; - - /* check segment usage */ - BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); - - /* check boundary of a given segment number */ - BUG_ON(segno > end_segno); - - /* check bitmap with valid block count */ - for (i = 0; i < sbi->blocks_per_seg; i++) - if (f2fs_test_bit(i, raw_sit->valid_map)) - valid_blocks++; - BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); -} - -static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, - unsigned int start) -{ - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, start); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return blk_addr; -} - -static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, - pgoff_t block_addr) -{ - struct sit_info *sit_i = SIT_I(sbi); - block_addr -= sit_i->sit_base_addr; - if (block_addr < sit_i->sit_blocks) - block_addr += sit_i->sit_blocks; - else - block_addr -= sit_i->sit_blocks; - - return block_addr + sit_i->sit_base_addr; -} - -static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) -{ - unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); - - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); -} - -static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) -{ - struct sit_info *sit_i = SIT_I(sbi); - return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - - sit_i->mounted_time; -} - -static inline void set_summary(struct f2fs_summary *sum, nid_t nid, - unsigned int ofs_in_node, unsigned char version) -{ - sum->nid = cpu_to_le32(nid); - sum->ofs_in_node = cpu_to_le16(ofs_in_node); - sum->version = version; -} - -static inline block_t start_sum_block(struct f2fs_sb_info *sbi) -{ - return __start_cp_addr(sbi) + - le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); -} - -static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) -{ - return __start_cp_addr(sbi) + - le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) - - (base + 1) + type; -} - -static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) -{ - if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) - return true; - return false; -} - -static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); -} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c deleted file mode 100644 index e8c28d18b3e..00000000000 --- a/fs/f2fs/super.c +++ /dev/null @@ -1,1154 +0,0 @@ -/* - * fs/f2fs/super.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "f2fs.h" -#include "node.h" -#include "segment.h" -#include "xattr.h" -#include "gc.h" - -#define CREATE_TRACE_POINTS -#include - -static struct proc_dir_entry *f2fs_proc_root; -static struct kmem_cache *f2fs_inode_cachep; -static struct kset *f2fs_kset; - -enum { - Opt_gc_background, - Opt_disable_roll_forward, - Opt_discard, - Opt_noheap, - Opt_nouser_xattr, - Opt_noacl, - Opt_active_logs, - Opt_disable_ext_identify, - Opt_inline_xattr, - Opt_android_emu, - Opt_err_continue, - Opt_err_panic, - Opt_err_recover, - Opt_err, -}; - -static match_table_t f2fs_tokens = { - {Opt_gc_background, "background_gc=%s"}, - {Opt_disable_roll_forward, "disable_roll_forward"}, - {Opt_discard, "discard"}, - {Opt_noheap, "no_heap"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_noacl, "noacl"}, - {Opt_active_logs, "active_logs=%u"}, - {Opt_disable_ext_identify, "disable_ext_identify"}, - {Opt_inline_xattr, "inline_xattr"}, - {Opt_android_emu, "android_emu=%s"}, - {Opt_err_continue, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_recover, "errors=recover"}, - {Opt_err, NULL}, -}; - -/* Sysfs support for f2fs */ -struct f2fs_attr { - struct attribute attr; - ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); - ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, - const char *, size_t); - int offset; -}; - -static ssize_t f2fs_sbi_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ - struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; - unsigned int *ui; - - if (!gc_kth) - return -EINVAL; - - ui = (unsigned int *)(((char *)gc_kth) + a->offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t f2fs_sbi_store(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, - const char *buf, size_t count) -{ - struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; - unsigned long t; - unsigned int *ui; - ssize_t ret; - - if (!gc_kth) - return -EINVAL; - - ui = (unsigned int *)(((char *)gc_kth) + a->offset); - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret < 0) - return ret; - *ui = t; - return count; -} - -static ssize_t f2fs_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->show ? a->show(a, sbi, buf) : 0; -} - -static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->store ? a->store(a, sbi, buf, len) : 0; -} - -static void f2fs_sb_release(struct kobject *kobj) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - complete(&sbi->s_kobj_unregister); -} - -#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ -static struct f2fs_attr f2fs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .offset = offsetof(struct f2fs_gc_kthread, _elname), \ -} - -#define F2FS_RW_ATTR(name, elname) \ - F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) - -F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(gc_idle, gc_idle); - -#define ATTR_LIST(name) (&f2fs_attr_##name.attr) -static struct attribute *f2fs_attrs[] = { - ATTR_LIST(gc_min_sleep_time), - ATTR_LIST(gc_max_sleep_time), - ATTR_LIST(gc_no_gc_sleep_time), - ATTR_LIST(gc_idle), - NULL, -}; - -static const struct sysfs_ops f2fs_attr_ops = { - .show = f2fs_attr_show, - .store = f2fs_attr_store, -}; - -static struct kobj_type f2fs_ktype = { - .default_attrs = f2fs_attrs, - .sysfs_ops = &f2fs_attr_ops, - .release = f2fs_sb_release, -}; - -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); - va_end(args); -} - -static void init_once(void *foo) -{ - struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; - - inode_init_once(&fi->vfs_inode); -} - -static int parse_android_emu(struct f2fs_sb_info *sbi, char *args) -{ - char *sep = args; - char *sepres; - int ret; - - if (!sep) - return -EINVAL; - - sepres = strsep(&sep, ":"); - if (!sep) - return -EINVAL; - ret = kstrtou32(sepres, 0, &sbi->android_emu_uid); - if (ret) - return ret; - - sepres = strsep(&sep, ":"); - if (!sep) - return -EINVAL; - ret = kstrtou32(sepres, 0, &sbi->android_emu_gid); - if (ret) - return ret; - - sepres = strsep(&sep, ":"); - ret = kstrtou16(sepres, 8, &sbi->android_emu_mode); - if (ret) - return ret; - - if (sep && strstr(sep, "nocase")) - sbi->android_emu_flags = F2FS_ANDROID_EMU_NOCASE; - - return 0; -} - -static int parse_options(struct super_block *sb, char *options) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - substring_t args[MAX_OPT_ARGS]; - char *p, *name; - int arg = 0; - - if (!options) - return 0; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); - - switch (token) { - case Opt_gc_background: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strncmp(name, "on", 2)) - set_opt(sbi, BG_GC); - else if (!strncmp(name, "off", 3)) - clear_opt(sbi, BG_GC); - else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_discard: - set_opt(sbi, DISCARD); - break; - case Opt_noheap: - set_opt(sbi, NOHEAP); - break; -#ifdef CONFIG_F2FS_FS_XATTR - case Opt_nouser_xattr: - clear_opt(sbi, XATTR_USER); - break; - case Opt_inline_xattr: - set_opt(sbi, INLINE_XATTR); - break; -#else - case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); - break; - case Opt_inline_xattr: - f2fs_msg(sb, KERN_INFO, - "inline_xattr options not supported"); - break; -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_noacl: - clear_opt(sbi, POSIX_ACL); - break; -#else - case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); - break; -#endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) - return -EINVAL; - sbi->active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - case Opt_err_continue: - clear_opt(sbi, ERRORS_RECOVER); - clear_opt(sbi, ERRORS_PANIC); - break; - case Opt_err_panic: - set_opt(sbi, ERRORS_PANIC); - clear_opt(sbi, ERRORS_RECOVER); - break; - case Opt_err_recover: - set_opt(sbi, ERRORS_RECOVER); - clear_opt(sbi, ERRORS_PANIC); - break; - case Opt_android_emu: - if (args->from) { - int ret; - char *perms = match_strdup(args); - - ret = parse_android_emu(sbi, perms); - kfree(perms); - - if (ret) - return -EINVAL; - - set_opt(sbi, ANDROID_EMU); - } else - return -EINVAL; - break; - - default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); - return -EINVAL; - } - } - return 0; -} - -static struct inode *f2fs_alloc_inode(struct super_block *sb) -{ - struct f2fs_inode_info *fi; - - fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); - if (!fi) - return NULL; - - init_once((void *) fi); - - /* Initialize f2fs-specific inode info */ - fi->vfs_inode.i_version = 1; - atomic_set(&fi->dirty_dents, 0); - fi->i_current_depth = 1; - fi->i_advise = 0; - rwlock_init(&fi->ext.ext_lock); - - set_inode_flag(fi, FI_NEW_INODE); - - if (test_opt(F2FS_SB(sb), INLINE_XATTR)) - set_inode_flag(fi, FI_INLINE_XATTR); - - return &fi->vfs_inode; -} - -static int f2fs_drop_inode(struct inode *inode) -{ - /* - * This is to avoid a deadlock condition like below. - * writeback_single_inode(inode) - * - f2fs_write_data_page - * - f2fs_gc -> iput -> evict - * - inode_wait_for_writeback(inode) - */ - if (!inode_unhashed(inode) && inode->i_state & I_SYNC) - return 0; - return generic_drop_inode(inode); -} - -/* - * f2fs_dirty_inode() is called from __mark_inode_dirty() - * - * We should call set_dirty_inode to write the dirty inode through write_inode. - */ -static void f2fs_dirty_inode(struct inode *inode, int flags) -{ - set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); -} - -static void f2fs_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); -} - -static void f2fs_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, f2fs_i_callback); -} - -static void f2fs_put_super(struct super_block *sb) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } - kobject_del(&sbi->s_kobj); - - f2fs_destroy_stats(sbi); - stop_gc_thread(sbi); - - write_checkpoint(sbi, true); - - iput(sbi->node_inode); - iput(sbi->meta_inode); - - /* destroy f2fs internal modules */ - destroy_node_manager(sbi); - destroy_segment_manager(sbi); - - kfree(sbi->ckpt); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); - - sb->s_fs_info = NULL; - brelse(sbi->raw_super_buf); - kfree(sbi); -} - -int f2fs_sync_fs(struct super_block *sb, int sync) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - - trace_f2fs_sync_fs(sb, sync); - - if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) - return 0; - - if (sync) { - mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, false); - mutex_unlock(&sbi->gc_mutex); - } else { - f2fs_balance_fs(sbi); - } - - return 0; -} - -static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct super_block *sb = dentry->d_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - block_t total_count, user_block_count, start_count, ovp_count; - - total_count = le64_to_cpu(sbi->raw_super->block_count); - user_block_count = sbi->user_block_count; - start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); - ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; - buf->f_type = F2FS_SUPER_MAGIC; - buf->f_bsize = sbi->blocksize; - - buf->f_blocks = total_count - start_count; - buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; - buf->f_bavail = user_block_count - valid_user_blocks(sbi); - - buf->f_files = sbi->total_node_count; - buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); - - buf->f_namelen = F2FS_NAME_LEN; - buf->f_fsid.val[0] = (u32)id; - buf->f_fsid.val[1] = (u32)(id >> 32); - - return 0; -} - -static int f2fs_show_options(struct seq_file *seq, struct vfsmount *vfs) -{ - struct f2fs_sb_info *sbi = F2FS_SB(vfs->mnt_sb); - - if (!(vfs->mnt_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) - seq_printf(seq, ",background_gc=%s", "on"); - else - seq_printf(seq, ",background_gc=%s", "off"); - if (test_opt(sbi, DISABLE_ROLL_FORWARD)) - seq_puts(seq, ",disable_roll_forward"); - if (test_opt(sbi, DISCARD)) - seq_puts(seq, ",discard"); - if (test_opt(sbi, NOHEAP)) - seq_puts(seq, ",no_heap_alloc"); -#ifdef CONFIG_F2FS_FS_XATTR - if (test_opt(sbi, XATTR_USER)) - seq_puts(seq, ",user_xattr"); - else - seq_puts(seq, ",nouser_xattr"); - if (test_opt(sbi, INLINE_XATTR)) - seq_puts(seq, ",inline_xattr"); -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - if (test_opt(sbi, POSIX_ACL)) - seq_puts(seq, ",acl"); - else - seq_puts(seq, ",noacl"); -#endif - if (test_opt(sbi, ERRORS_PANIC)) - seq_puts(seq, ",errors=panic"); - else if (test_opt(sbi, ERRORS_RECOVER)) - seq_puts(seq, ",errors=recover"); - else - seq_puts(seq, ",errors=continue"); - if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) - seq_puts(seq, ",disable_ext_identify"); - - if (test_opt(sbi, ANDROID_EMU)) - seq_printf(seq, ",android_emu=%u:%u:%ho%s", - sbi->android_emu_uid, - sbi->android_emu_gid, - sbi->android_emu_mode, - (sbi->android_emu_flags & - F2FS_ANDROID_EMU_NOCASE) ? - ":nocase" : ""); - - seq_printf(seq, ",active_logs=%u", sbi->active_logs); - - return 0; -} - -static int segment_info_seq_show(struct seq_file *seq, void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); - int i; - - for (i = 0; i < total_segs; i++) { - seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); - if (i != 0 && (i % 10) == 0) - seq_puts(seq, "\n"); - else - seq_puts(seq, " "); - } - return 0; -} - -static int segment_info_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, segment_info_seq_show, - PROC_I(inode)->pde->data); -} - -static const struct file_operations f2fs_seq_segment_info_fops = { - .owner = THIS_MODULE, - .open = segment_info_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int f2fs_remount(struct super_block *sb, int *flags, char *data) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct f2fs_mount_info org_mount_opt; - int err, active_logs; - - /* - * Save the old mount options in case we - * need to restore them. - */ - org_mount_opt = sbi->mount_opt; - active_logs = sbi->active_logs; - - /* parse mount options */ - err = parse_options(sb, data); - if (err) - goto restore_opts; - - /* - * Previous and new state of filesystem is RO, - * so no point in checking GC conditions. - */ - if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) - goto skip; - - /* - * We stop the GC thread if FS is mounted as RO - * or if background_gc = off is passed in mount - * option. Also sync the filesystem. - */ - if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { - if (sbi->gc_thread) { - stop_gc_thread(sbi); - f2fs_sync_fs(sb, 1); - } - } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { - err = start_gc_thread(sbi); - if (err) - goto restore_opts; - } -skip: - /* Update the POSIXACL Flag */ - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); - return 0; - -restore_opts: - sbi->mount_opt = org_mount_opt; - sbi->active_logs = active_logs; - return err; -} - -static struct super_operations f2fs_sops = { - .alloc_inode = f2fs_alloc_inode, - .drop_inode = f2fs_drop_inode, - .destroy_inode = f2fs_destroy_inode, - .write_inode = f2fs_write_inode, - .dirty_inode = f2fs_dirty_inode, - .show_options = f2fs_show_options, - .evict_inode = f2fs_evict_inode, - .put_super = f2fs_put_super, - .sync_fs = f2fs_sync_fs, - .statfs = f2fs_statfs, - .remount_fs = f2fs_remount, -}; - -static struct inode *f2fs_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct inode *inode; - - if (ino < F2FS_ROOT_INO(sbi)) - return ERR_PTR(-ESTALE); - - /* - * f2fs_iget isn't quite right if the inode is currently unallocated! - * However f2fs_iget currently does appropriate checks to handle stale - * inodes so everything is OK. - */ - inode = f2fs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { - /* we didn't find the right inode.. */ - iput(inode); - return ERR_PTR(-ESTALE); - } - return inode; -} - -static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - f2fs_nfs_get_inode); -} - -static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - f2fs_nfs_get_inode); -} - -static const struct export_operations f2fs_export_ops = { - .fh_to_dentry = f2fs_fh_to_dentry, - .fh_to_parent = f2fs_fh_to_parent, - .get_parent = f2fs_get_parent, -}; - -static loff_t max_file_size(unsigned bits) -{ - loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); - loff_t leaf_count = ADDRS_PER_BLOCK; - - /* two direct node blocks */ - result += (leaf_count * 2); - - /* two indirect node blocks */ - leaf_count *= NIDS_PER_BLOCK; - result += (leaf_count * 2); - - /* one double indirect node block */ - leaf_count *= NIDS_PER_BLOCK; - result += leaf_count; - - result <<= bits; - return result; -} - -static int sanity_check_raw_super(struct super_block *sb, - struct f2fs_super_block *raw_super) -{ - unsigned int blocksize; - - if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { - f2fs_msg(sb, KERN_INFO, - "Magic Mismatch, valid(0x%x) - read(0x%x)", - F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); - return 1; - } - - /* Currently, support only 4KB page cache size */ - if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid page_cache_size (%lu), supports only 4KB\n", - PAGE_CACHE_SIZE); - return 1; - } - - /* Currently, support only 4KB block size */ - blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); - if (blocksize != F2FS_BLKSIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid blocksize (%u), supports only 4KB\n", - blocksize); - return 1; - } - - if (le32_to_cpu(raw_super->log_sectorsize) != - F2FS_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); - return 1; - } - if (le32_to_cpu(raw_super->log_sectors_per_block) != - F2FS_LOG_SECTORS_PER_BLOCK) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); - return 1; - } - return 0; -} - -static int sanity_check_ckpt(struct f2fs_sb_info *sbi) -{ - unsigned int total, fsmeta; - struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - - total = le32_to_cpu(raw_super->segment_count); - fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); - fsmeta += le32_to_cpu(raw_super->segment_count_sit); - fsmeta += le32_to_cpu(raw_super->segment_count_nat); - fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); - fsmeta += le32_to_cpu(raw_super->segment_count_ssa); - - if (fsmeta >= total) - return 1; - - if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { - f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); - return 1; - } - return 0; -} - -static void init_sb_info(struct f2fs_sb_info *sbi) -{ - struct f2fs_super_block *raw_super = sbi->raw_super; - int i; - - sbi->log_sectors_per_block = - le32_to_cpu(raw_super->log_sectors_per_block); - sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize); - sbi->blocksize = 1 << sbi->log_blocksize; - sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); - sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg; - sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); - sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); - sbi->total_sections = le32_to_cpu(raw_super->section_count); - sbi->total_node_count = - (le32_to_cpu(raw_super->segment_count_nat) / 2) - * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; - sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); - sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); - sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); - sbi->cur_victim_sec = NULL_SECNO; - - for (i = 0; i < NR_COUNT_TYPE; i++) - atomic_set(&sbi->nr_pages[i], 0); -} - -static int validate_superblock(struct super_block *sb, - struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf, sector_t block) -{ - const char *super = (block == 0 ? "first" : "second"); - - /* read f2fs raw super block */ - *raw_super_buf = sb_bread(sb, block); - if (!*raw_super_buf) { - f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", - super); - return -EIO; - } - - *raw_super = (struct f2fs_super_block *) - ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); - - /* sanity checking of raw super */ - if (!sanity_check_raw_super(sb, *raw_super)) - return 0; - - f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " - "in %s superblock", super); - return -EINVAL; -} - -static int f2fs_fill_super(struct super_block *sb, void *data, int silent) -{ - struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super; - struct buffer_head *raw_super_buf; - struct inode *root; - long err = -EINVAL; - int i; - const char *descr = ""; - - f2fs_msg(sb, KERN_INFO, "mounting.."); - /* allocate memory for f2fs-specific super block info */ - sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - - /* set a block size */ - if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { - f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); - goto free_sbi; - } - - err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); - if (err) { - brelse(raw_super_buf); - /* check secondary superblock when primary failed */ - err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); - if (err) - goto free_sb_buf; - } - sb->s_fs_info = sbi; - /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - - set_opt(sbi, BG_GC); - -#ifdef CONFIG_F2FS_FS_XATTR - set_opt(sbi, XATTR_USER); -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - set_opt(sbi, POSIX_ACL); -#endif - /* parse mount options */ - err = parse_options(sb, (char *)data); - if (err) - goto free_sb_buf; - - sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); - get_random_bytes(&sbi->s_next_generation, sizeof(u32)); - - sb->s_op = &f2fs_sops; - sb->s_xattr = f2fs_xattr_handlers; - sb->s_export_op = &f2fs_export_ops; - sb->s_magic = F2FS_SUPER_MAGIC; - sb->s_time_gran = 1; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); - memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); - - /* init f2fs-specific super block info */ - sbi->sb = sb; - sbi->raw_super = raw_super; - sbi->raw_super_buf = raw_super_buf; - mutex_init(&sbi->gc_mutex); - mutex_init(&sbi->writepages); - mutex_init(&sbi->cp_mutex); - for (i = 0; i < NR_GLOBAL_LOCKS; i++) - mutex_init(&sbi->fs_lock[i]); - mutex_init(&sbi->node_write); - sbi->por_doing = 0; - spin_lock_init(&sbi->stat_lock); - init_rwsem(&sbi->bio_sem); - init_sb_info(sbi); - - /* get an inode for meta space */ - sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); - if (IS_ERR(sbi->meta_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); - err = PTR_ERR(sbi->meta_inode); - goto free_sb_buf; - } - -get_cp: - err = get_valid_checkpoint(sbi); - if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); - goto free_meta_inode; - } - - /* sanity checking of checkpoint */ - err = -EINVAL; - if (sanity_check_ckpt(sbi)) { - f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint"); - goto free_cp; - } - - sbi->total_valid_node_count = - le32_to_cpu(sbi->ckpt->valid_node_count); - sbi->total_valid_inode_count = - le32_to_cpu(sbi->ckpt->valid_inode_count); - sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); - sbi->total_valid_block_count = - le64_to_cpu(sbi->ckpt->valid_block_count); - sbi->last_valid_block_count = sbi->total_valid_block_count; - sbi->alloc_valid_block_count = 0; - INIT_LIST_HEAD(&sbi->dir_inode_list); - spin_lock_init(&sbi->dir_inode_lock); - - init_orphan_info(sbi); - - /* setup f2fs internal modules */ - err = build_segment_manager(sbi); - if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS segment manager"); - goto free_sm; - } - err = build_node_manager(sbi); - if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS node manager"); - goto free_nm; - } - - build_gc_manager(sbi); - - /* get an inode for node space */ - sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); - if (IS_ERR(sbi->node_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); - err = PTR_ERR(sbi->node_inode); - goto free_nm; - } - - /* if there are nt orphan nodes free them */ - err = -EINVAL; - if (recover_orphan_inodes(sbi)) - goto free_node_inode; - - /* read root inode and dentry */ - root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); - if (IS_ERR(root)) { - f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); - err = PTR_ERR(root); - goto free_node_inode; - } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) - goto free_root_inode; - - sb->s_root = d_alloc_root(root); /* allocate root dentry */ - if (!sb->s_root) { - err = -ENOMEM; - goto free_root_inode; - } - - /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { - err = recover_fsync_data(sbi); - if (err) { - if (f2fs_handle_error(sbi)) { - set_opt(sbi, DISABLE_ROLL_FORWARD); - kfree(sbi->ckpt); - f2fs_msg(sb, KERN_ERR, - "reloading last checkpoint"); - goto get_cp; - } - f2fs_msg(sb, KERN_ERR, - "cannot recover all fsync data errno=%ld", err); - /* checkpoint what we have */ - write_checkpoint(sbi, false); - } - } - - /* - * If filesystem is not mounted as read-only then - * do start the gc_thread. - */ - if (!(sb->s_flags & MS_RDONLY)) { - /* After POR, we can run background GC thread.*/ - err = start_gc_thread(sbi); - if (err) - goto fail; - } - - err = f2fs_build_stats(sbi); - if (err) - goto fail; - - if (f2fs_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); - - if (sbi->s_proc) - proc_create_data("segment_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_info_fops, sb); - - if (test_opt(sbi, DISCARD)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } - - if (test_opt(sbi, ANDROID_EMU)) - descr = " with android sdcard emulation"; - f2fs_msg(sb, KERN_INFO, "mounted filesystem%s", descr); - - sbi->s_kobj.kset = f2fs_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, - "%s", sb->s_id); - if (err) - goto fail; - - return 0; -fail: - stop_gc_thread(sbi); -free_root_inode: - iput(root); -free_node_inode: - iput(sbi->node_inode); -free_nm: - destroy_node_manager(sbi); -free_sm: - destroy_segment_manager(sbi); -free_cp: - kfree(sbi->ckpt); -free_meta_inode: - make_bad_inode(sbi->meta_inode); - iput(sbi->meta_inode); -free_sb_buf: - brelse(raw_super_buf); -free_sbi: - kfree(sbi); - f2fs_msg(sb, KERN_ERR, "mount failed"); - return err; -} - -static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); -} - -static struct file_system_type f2fs_fs_type = { - .owner = THIS_MODULE, - .name = "f2fs", - .mount = f2fs_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -static int __init init_inodecache(void) -{ - f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", - sizeof(struct f2fs_inode_info), NULL); - if (f2fs_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - /* - * Make sure all delayed rcu free inodes are flushed before we - * destroy cache. - */ - rcu_barrier(); - kmem_cache_destroy(f2fs_inode_cachep); -} - -static int __init init_f2fs_fs(void) -{ - int err; - - err = init_inodecache(); - if (err) - goto fail; - err = create_node_manager_caches(); - if (err) - goto free_inodecache; - err = create_gc_caches(); - if (err) - goto free_node_manager_caches; - err = create_checkpoint_caches(); - if (err) - goto free_gc_caches; - f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); - if (!f2fs_kset) { - err = -ENOMEM; - goto free_checkpoint_caches; - } - err = register_filesystem(&f2fs_fs_type); - if (err) - goto free_kset; - f2fs_create_root_stats(); - f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); - return 0; - -free_kset: - kset_unregister(f2fs_kset); -free_checkpoint_caches: - destroy_checkpoint_caches(); -free_gc_caches: - destroy_gc_caches(); -free_node_manager_caches: - destroy_node_manager_caches(); -free_inodecache: - destroy_inodecache(); -fail: - return err; -} - -static void __exit exit_f2fs_fs(void) -{ - remove_proc_entry("fs/f2fs", NULL); - f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); - destroy_checkpoint_caches(); - destroy_gc_caches(); - destroy_node_manager_caches(); - destroy_inodecache(); - kset_unregister(f2fs_kset); -} - -module_init(init_f2fs_fs) -module_exit(exit_f2fs_fs) - -MODULE_AUTHOR("Samsung Electronics's Praesto Team"); -MODULE_DESCRIPTION("Flash Friendly File System"); -MODULE_LICENSE("GPL"); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c deleted file mode 100644 index 85b99ebcbd5..00000000000 --- a/fs/f2fs/xattr.c +++ /dev/null @@ -1,600 +0,0 @@ -/* - * fs/f2fs/xattr.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * Portions of this code from linux/fs/ext2/xattr.c - * - * Copyright (C) 2001-2003 Andreas Gruenbacher - * - * Fix by Harrison Xing . - * Extended attributes for symlinks and special files added per - * suggestion of Luka Renko . - * xattr consolidation Copyright (c) 2004 James Morris , - * Red Hat Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include "f2fs.h" -#include "xattr.h" - -static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - int total_len, prefix_len = 0; - const char *prefix = NULL; - - switch (type) { - case F2FS_XATTR_INDEX_USER: - if (!test_opt(sbi, XATTR_USER)) - return -EOPNOTSUPP; - prefix = XATTR_USER_PREFIX; - prefix_len = XATTR_USER_PREFIX_LEN; - break; - case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - prefix = XATTR_TRUSTED_PREFIX; - prefix_len = XATTR_TRUSTED_PREFIX_LEN; - break; - case F2FS_XATTR_INDEX_SECURITY: - prefix = XATTR_SECURITY_PREFIX; - prefix_len = XATTR_SECURITY_PREFIX_LEN; - break; - default: - return -EINVAL; - } - - total_len = prefix_len + name_len + 1; - if (list && total_len <= list_size) { - memcpy(list, prefix, prefix_len); - memcpy(list + prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - -static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - - switch (type) { - case F2FS_XATTR_INDEX_USER: - if (!test_opt(sbi, XATTR_USER)) - return -EOPNOTSUPP; - break; - case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; - case F2FS_XATTR_INDEX_SECURITY: - break; - default: - return -EINVAL; - } - if (strcmp(name, "") == 0) - return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); -} - -static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - - switch (type) { - case F2FS_XATTR_INDEX_USER: - if (!test_opt(sbi, XATTR_USER)) - return -EOPNOTSUPP; - break; - case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; - case F2FS_XATTR_INDEX_SECURITY: - break; - default: - return -EINVAL; - } - if (strcmp(name, "") == 0) - return -EINVAL; - - return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); -} - -static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; - size_t size; - - if (type != F2FS_XATTR_INDEX_ADVISE) - return 0; - - size = strlen(xname) + 1; - if (list && size <= list_size) - memcpy(list, xname, size); - return size; -} - -static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - struct inode *inode = dentry->d_inode; - - if (!name || strcmp(name, "") != 0) - return -EINVAL; - - if (buffer) - *((char *)buffer) = F2FS_I(inode)->i_advise; - return sizeof(char); -} - -static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - struct inode *inode = dentry->d_inode; - - if (!name || strcmp(name, "") != 0) - return -EINVAL; - if (!inode_owner_or_capable(inode)) - return -EPERM; - if (value == NULL) - return -EINVAL; - - F2FS_I(inode)->i_advise = *(char *)value; - return 0; -} - -#ifdef CONFIG_F2FS_FS_SECURITY -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage); -static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *page) -{ - const struct xattr *xattr; - int err = 0; - - for (xattr = xattr_array; xattr->name != NULL; xattr++) { - err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, - xattr->name, xattr->value, - xattr->value_len, (struct page *)page); - if (err < 0) - break; - } - return err; -} - -int f2fs_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, struct page *ipage) -{ - return security_new_inode_init_security(inode, dir, qstr, - &f2fs_initxattrs, ipage); -} -#endif - -const struct xattr_handler f2fs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .flags = F2FS_XATTR_INDEX_USER, - .list = f2fs_xattr_generic_list, - .get = f2fs_xattr_generic_get, - .set = f2fs_xattr_generic_set, -}; - -const struct xattr_handler f2fs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .flags = F2FS_XATTR_INDEX_TRUSTED, - .list = f2fs_xattr_generic_list, - .get = f2fs_xattr_generic_get, - .set = f2fs_xattr_generic_set, -}; - -const struct xattr_handler f2fs_xattr_advise_handler = { - .prefix = F2FS_SYSTEM_ADVISE_PREFIX, - .flags = F2FS_XATTR_INDEX_ADVISE, - .list = f2fs_xattr_advise_list, - .get = f2fs_xattr_advise_get, - .set = f2fs_xattr_advise_set, -}; - -const struct xattr_handler f2fs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .flags = F2FS_XATTR_INDEX_SECURITY, - .list = f2fs_xattr_generic_list, - .get = f2fs_xattr_generic_get, - .set = f2fs_xattr_generic_set, -}; - -static const struct xattr_handler *f2fs_xattr_handler_map[] = { - [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, -#ifdef CONFIG_F2FS_FS_POSIX_ACL - [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, - [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, -#endif - [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, -#ifdef CONFIG_F2FS_FS_SECURITY - [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, -#endif - [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, -}; - -const struct xattr_handler *f2fs_xattr_handlers[] = { - &f2fs_xattr_user_handler, -#ifdef CONFIG_F2FS_FS_POSIX_ACL - &f2fs_xattr_acl_access_handler, - &f2fs_xattr_acl_default_handler, -#endif - &f2fs_xattr_trusted_handler, -#ifdef CONFIG_F2FS_FS_SECURITY - &f2fs_xattr_security_handler, -#endif - &f2fs_xattr_advise_handler, - NULL, -}; - -static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) -{ - const struct xattr_handler *handler = NULL; - - if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) - handler = f2fs_xattr_handler_map[name_index]; - return handler; -} - -static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, - size_t name_len, const char *name) -{ - struct f2fs_xattr_entry *entry; - - list_for_each_xattr(entry, base_addr) { - if (entry->e_name_index != name_index) - continue; - if (entry->e_name_len != name_len) - continue; - if (!memcmp(entry->e_name, name, name_len)) - break; - } - return entry; -} - -static void *read_all_xattrs(struct inode *inode, struct page *ipage) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_xattr_header *header; - size_t size = PAGE_SIZE, inline_size = 0; - void *txattr_addr; - - inline_size = inline_xattr_size(inode); - - txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); - if (!txattr_addr) - return NULL; - - /* read from inline xattr */ - if (inline_size) { - struct page *page = NULL; - void *inline_addr; - - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) - goto fail; - inline_addr = inline_xattr_addr(page); - } - memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); - } - - /* read from xattr node block */ - if (F2FS_I(inode)->i_xattr_nid) { - struct page *xpage; - void *xattr_addr; - - /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); - if (IS_ERR(xpage)) - goto fail; - - xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); - f2fs_put_page(xpage, 1); - } - - header = XATTR_HDR(txattr_addr); - - /* never been allocated xattrs */ - if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { - header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); - header->h_refcount = cpu_to_le32(1); - } - return txattr_addr; -fail: - kzfree(txattr_addr); - return NULL; -} - -static inline int write_all_xattrs(struct inode *inode, __u32 hsize, - void *txattr_addr, struct page *ipage) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - size_t inline_size = 0; - void *xattr_addr; - struct page *xpage; - nid_t new_nid = 0; - int err; - - inline_size = inline_xattr_size(inode); - - if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) - if (!alloc_nid(sbi, &new_nid)) - return -ENOSPC; - - /* write to inline xattr */ - if (inline_size) { - struct page *page = NULL; - void *inline_addr; - - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - alloc_nid_failed(sbi, new_nid); - return PTR_ERR(page); - } - inline_addr = inline_xattr_addr(page); - } - memcpy(inline_addr, txattr_addr, inline_size); - f2fs_put_page(page, 1); - - /* no need to use xattr node block */ - if (hsize <= inline_size) { - err = truncate_xattr_node(inode, ipage); - alloc_nid_failed(sbi, new_nid); - return err; - } - } - - /* write to xattr node block */ - if (F2FS_I(inode)->i_xattr_nid) { - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); - if (IS_ERR(xpage)) { - alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); - } - BUG_ON(new_nid); - } else { - struct dnode_of_data dn; - set_new_dnode(&dn, inode, NULL, NULL, new_nid); - xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); - if (IS_ERR(xpage)) { - alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); - } - alloc_nid_done(sbi, new_nid); - } - - xattr_addr = page_address(xpage); - memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - - sizeof(struct node_footer)); - set_page_dirty(xpage); - f2fs_put_page(xpage, 1); - - /* need to checkpoint during fsync */ - F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); - return 0; -} - -int f2fs_getxattr(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -{ - struct f2fs_xattr_entry *entry; - void *base_addr; - int error = 0; - size_t value_len, name_len; - - if (name == NULL) - return -EINVAL; - name_len = strlen(name); - - base_addr = read_all_xattrs(inode, NULL); - if (!base_addr) - return -ENOMEM; - - entry = __find_xattr(base_addr, name_index, name_len, name); - if (IS_XATTR_LAST_ENTRY(entry)) { - error = -ENODATA; - goto cleanup; - } - - value_len = le16_to_cpu(entry->e_value_size); - - if (buffer && value_len > buffer_size) { - error = -ERANGE; - goto cleanup; - } - - if (buffer) { - char *pval = entry->e_name + entry->e_name_len; - memcpy(buffer, pval, value_len); - } - error = value_len; - -cleanup: - kzfree(base_addr); - return error; -} - -ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) -{ - struct inode *inode = dentry->d_inode; - struct f2fs_xattr_entry *entry; - void *base_addr; - int error = 0; - size_t rest = buffer_size; - - base_addr = read_all_xattrs(inode, NULL); - if (!base_addr) - return -ENOMEM; - - list_for_each_xattr(entry, base_addr) { - const struct xattr_handler *handler = - f2fs_xattr_handler(entry->e_name_index); - size_t size; - - if (!handler) - continue; - - size = handler->list(dentry, buffer, rest, entry->e_name, - entry->e_name_len, handler->flags); - if (buffer && size > rest) { - error = -ERANGE; - goto cleanup; - } - - if (buffer) - buffer += size; - rest -= size; - } - error = buffer_size - rest; -cleanup: - kzfree(base_addr); - return error; -} - -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_xattr_entry *here, *last; - void *base_addr; - int found, newsize; - size_t name_len; - __u32 new_hsize; - int error = -ENOMEM; - - if (name == NULL) - return -EINVAL; - - if (value == NULL) - value_len = 0; - - name_len = strlen(name); - - if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) - return -ERANGE; - - base_addr = read_all_xattrs(inode, ipage); - if (!base_addr) - goto exit; - - /* find entry with wanted name. */ - here = __find_xattr(base_addr, name_index, name_len, name); - - found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; - last = here; - - while (!IS_XATTR_LAST_ENTRY(last)) - last = XATTR_NEXT_ENTRY(last); - - newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + - name_len + value_len); - - /* 1. Check space */ - if (value) { - int free; - /* - * If value is NULL, it is remove operation. - * In case of update operation, we caculate free. - */ - free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); - if (found) - free = free - ENTRY_SIZE(here); - - if (free < newsize) { - error = -ENOSPC; - goto exit; - } - } - - /* 2. Remove old entry */ - if (found) { - /* - * If entry is found, remove old entry. - * If not found, remove operation is not needed. - */ - struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); - int oldsize = ENTRY_SIZE(here); - - memmove(here, next, (char *)last - (char *)next); - last = (struct f2fs_xattr_entry *)((char *)last - oldsize); - memset(last, 0, oldsize); - } - - new_hsize = (char *)last - (char *)base_addr; - - /* 3. Write new entry */ - if (value) { - char *pval; - /* - * Before we come here, old entry is removed. - * We just write new entry. - */ - memset(last, 0, newsize); - last->e_name_index = name_index; - last->e_name_len = name_len; - memcpy(last->e_name, name, name_len); - pval = last->e_name + name_len; - memcpy(pval, value, value_len); - last->e_value_size = cpu_to_le16(value_len); - new_hsize += newsize; - } - - error = write_all_xattrs(inode, new_hsize, base_addr, ipage); - if (error) - goto exit; - - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - inode->i_mode = fi->i_acl_mode; - inode->i_ctime = CURRENT_TIME; - clear_inode_flag(fi, FI_ACL_MODE); - } - - if (ipage) - update_inode(inode, ipage); - else - update_inode_page(inode); -exit: - kzfree(base_addr); - return error; -} - -int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, struct page *ipage) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ilock; - int err; - - f2fs_balance_fs(sbi); - - ilock = mutex_lock_op(sbi); - - err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); - - mutex_unlock_op(sbi, ilock); - - return err; -} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h deleted file mode 100644 index 02a08fb88a1..00000000000 --- a/fs/f2fs/xattr.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * fs/f2fs/xattr.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * Portions of this code from linux/fs/ext2/xattr.h - * - * On-disk format of extended attributes for the ext2 filesystem. - * - * (C) 2001 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __F2FS_XATTR_H__ -#define __F2FS_XATTR_H__ - -#include -#include - -/* Magic value in attribute blocks */ -#define F2FS_XATTR_MAGIC 0xF2F52011 - -/* Maximum number of references to one attribute block */ -#define F2FS_XATTR_REFCOUNT_MAX 1024 - -/* Name indexes */ -#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise" -#define F2FS_XATTR_INDEX_USER 1 -#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2 -#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -#define F2FS_XATTR_INDEX_TRUSTED 4 -#define F2FS_XATTR_INDEX_LUSTRE 5 -#define F2FS_XATTR_INDEX_SECURITY 6 -#define F2FS_XATTR_INDEX_ADVISE 7 - -struct f2fs_xattr_header { - __le32 h_magic; /* magic number for identification */ - __le32 h_refcount; /* reference count */ - __u32 h_reserved[4]; /* zero right now */ -}; - -struct f2fs_xattr_entry { - __u8 e_name_index; - __u8 e_name_len; - __le16 e_value_size; /* size of attribute value */ - char e_name[0]; /* attribute name */ -}; - -#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) -#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) -#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) -#define XATTR_ROUND (3) - -#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) - -#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ - entry->e_name_len + le16_to_cpu(entry->e_value_size))) - -#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ - ENTRY_SIZE(entry))) - -#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) - -#define list_for_each_xattr(entry, addr) \ - for (entry = XATTR_FIRST_ENTRY(addr);\ - !IS_XATTR_LAST_ENTRY(entry);\ - entry = XATTR_NEXT_ENTRY(entry)) - -#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ - sizeof(struct node_footer) - sizeof(__u32)) - -#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ - sizeof(struct f2fs_xattr_header) - \ - sizeof(struct f2fs_xattr_entry)) - -/* - * On-disk structure of f2fs_xattr - * We use inline xattrs space + 1 block for xattr. - * - * +--------------------+ - * | f2fs_xattr_header | - * | | - * +--------------------+ - * | f2fs_xattr_entry | - * | .e_name_index = 1 | - * | .e_name_len = 3 | - * | .e_value_size = 14 | - * | .e_name = "foo" | - * | "value_of_xattr" |<- value_offs = e_name + e_name_len - * +--------------------+ - * | f2fs_xattr_entry | - * | .e_name_index = 4 | - * | .e_name = "bar" | - * +--------------------+ - * | | - * | Free | - * | | - * +--------------------+<- MIN_OFFSET - * | node_footer | - * | (nid, ino, offset) | - * +--------------------+ - * - **/ - -#ifdef CONFIG_F2FS_FS_XATTR -extern const struct xattr_handler f2fs_xattr_user_handler; -extern const struct xattr_handler f2fs_xattr_trusted_handler; -extern const struct xattr_handler f2fs_xattr_acl_access_handler; -extern const struct xattr_handler f2fs_xattr_acl_default_handler; -extern const struct xattr_handler f2fs_xattr_advise_handler; -extern const struct xattr_handler f2fs_xattr_security_handler; - -extern const struct xattr_handler *f2fs_xattr_handlers[]; - -extern int f2fs_setxattr(struct inode *, int, const char *, - const void *, size_t, struct page *); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); -extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); -#else - -#define f2fs_xattr_handlers NULL -static inline int f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len) -{ - return -EOPNOTSUPP; -} -static inline int f2fs_getxattr(struct inode *inode, int name_index, - const char *name, void *buffer, size_t buffer_size) -{ - return -EOPNOTSUPP; -} -static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, - size_t buffer_size) -{ - return -EOPNOTSUPP; -} -#endif - -#ifdef CONFIG_F2FS_FS_SECURITY -extern int f2fs_init_security(struct inode *, struct inode *, - const struct qstr *, struct page *); -#else -static inline int f2fs_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, struct page *ipage) -{ - return 0; -} -#endif -#endif /* __F2FS_XATTR_H__ */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9327888b4b2..713c7c62443 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -239,7 +239,6 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); -extern struct dentry * d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h deleted file mode 100644 index bb942f6d570..00000000000 --- a/include/linux/f2fs_fs.h +++ /dev/null @@ -1,424 +0,0 @@ -/** - * include/linux/f2fs_fs.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _LINUX_F2FS_FS_H -#define _LINUX_F2FS_FS_H - -#include -#include - -#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ -#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ -#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ -#define F2FS_BLKSIZE 4096 /* support only 4KB block */ -#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ - -#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ -#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ - -#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) -#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) -#define F2FS_META_INO(sbi) (sbi->meta_ino_num) - -/* This flag is used by node and meta inodes, and by recovery */ -#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) - -/* - * For further optimization on multi-head logs, on-disk layout supports maximum - * 16 logs by default. The number, 16, is expected to cover all the cases - * enoughly. The implementaion currently uses no more than 6 logs. - * Half the logs are used for nodes, and the other half are used for data. - */ -#define MAX_ACTIVE_LOGS 16 -#define MAX_ACTIVE_NODE_LOGS 8 -#define MAX_ACTIVE_DATA_LOGS 8 - -/* - * For superblock - */ -struct f2fs_super_block { - __le32 magic; /* Magic Number */ - __le16 major_ver; /* Major Version */ - __le16 minor_ver; /* Minor Version */ - __le32 log_sectorsize; /* log2 sector size in bytes */ - __le32 log_sectors_per_block; /* log2 # of sectors per block */ - __le32 log_blocksize; /* log2 block size in bytes */ - __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ - __le32 segs_per_sec; /* # of segments per section */ - __le32 secs_per_zone; /* # of sections per zone */ - __le32 checksum_offset; /* checksum offset inside super block */ - __le64 block_count; /* total # of user blocks */ - __le32 section_count; /* total # of sections */ - __le32 segment_count; /* total # of segments */ - __le32 segment_count_ckpt; /* # of segments for checkpoint */ - __le32 segment_count_sit; /* # of segments for SIT */ - __le32 segment_count_nat; /* # of segments for NAT */ - __le32 segment_count_ssa; /* # of segments for SSA */ - __le32 segment_count_main; /* # of segments for main area */ - __le32 segment0_blkaddr; /* start block address of segment 0 */ - __le32 cp_blkaddr; /* start block address of checkpoint */ - __le32 sit_blkaddr; /* start block address of SIT */ - __le32 nat_blkaddr; /* start block address of NAT */ - __le32 ssa_blkaddr; /* start block address of SSA */ - __le32 main_blkaddr; /* start block address of main area */ - __le32 root_ino; /* root inode number */ - __le32 node_ino; /* node inode number */ - __le32 meta_ino; /* meta inode number */ - __u8 uuid[16]; /* 128-bit uuid for volume */ - __le16 volume_name[512]; /* volume name */ - __le32 extension_count; /* # of extensions below */ - __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ -} __packed; - -/* - * For checkpoint - */ -#define CP_ERROR_FLAG 0x00000008 -#define CP_COMPACT_SUM_FLAG 0x00000004 -#define CP_ORPHAN_PRESENT_FLAG 0x00000002 -#define CP_UMOUNT_FLAG 0x00000001 - -struct f2fs_checkpoint { - __le64 checkpoint_ver; /* checkpoint block version number */ - __le64 user_block_count; /* # of user blocks */ - __le64 valid_block_count; /* # of valid blocks in main area */ - __le32 rsvd_segment_count; /* # of reserved segments for gc */ - __le32 overprov_segment_count; /* # of overprovision segments */ - __le32 free_segment_count; /* # of free segments in main area */ - - /* information of current node segments */ - __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; - __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; - /* information of current data segments */ - __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; - __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; - __le32 ckpt_flags; /* Flags : umount and journal_present */ - __le32 cp_pack_total_block_count; /* total # of one cp pack */ - __le32 cp_pack_start_sum; /* start block number of data summary */ - __le32 valid_node_count; /* Total number of valid nodes */ - __le32 valid_inode_count; /* Total number of valid inodes */ - __le32 next_free_nid; /* Next free node number */ - __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ - __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ - __le32 checksum_offset; /* checksum offset inside cp block */ - __le64 elapsed_time; /* mounted time */ - /* allocation type of current segment */ - unsigned char alloc_type[MAX_ACTIVE_LOGS]; - - /* SIT and NAT version bitmap */ - unsigned char sit_nat_version_bitmap[1]; -} __packed; - -/* - * For orphan inode management - */ -#define F2FS_ORPHANS_PER_BLOCK 1020 - -struct f2fs_orphan_block { - __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ - __le32 reserved; /* reserved */ - __le16 blk_addr; /* block index in current CP */ - __le16 blk_count; /* Number of orphan inode blocks in CP */ - __le32 entry_count; /* Total number of orphan nodes in current CP */ - __le32 check_sum; /* CRC32 for orphan inode block */ -} __packed; - -/* - * For NODE structure - */ -struct f2fs_extent { - __le32 fofs; /* start file offset of the extent */ - __le32 blk_addr; /* start block address of the extent */ - __le32 len; /* lengh of the extent */ -} __packed; - -#define F2FS_NAME_LEN 255 -#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ -#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ -#define ADDRS_PER_INODE(fi) addrs_per_inode(fi) -#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ - -#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) -#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) -#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) -#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4) -#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) - -#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ - -struct f2fs_inode { - __le16 i_mode; /* file mode */ - __u8 i_advise; /* file hints */ - __u8 i_inline; /* file inline flags */ - __le32 i_uid; /* user ID */ - __le32 i_gid; /* group ID */ - __le32 i_links; /* links count */ - __le64 i_size; /* file size in bytes */ - __le64 i_blocks; /* file size in blocks */ - __le64 i_atime; /* access time */ - __le64 i_ctime; /* change time */ - __le64 i_mtime; /* modification time */ - __le32 i_atime_nsec; /* access time in nano scale */ - __le32 i_ctime_nsec; /* change time in nano scale */ - __le32 i_mtime_nsec; /* modification time in nano scale */ - __le32 i_generation; /* file version (for NFS) */ - __le32 i_current_depth; /* only for directory depth */ - __le32 i_xattr_nid; /* nid to save xattr */ - __le32 i_flags; /* file attributes */ - __le32 i_pino; /* parent inode number */ - __le32 i_namelen; /* file name length */ - __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ - __u8 i_reserved2; /* for backward compatibility */ - - struct f2fs_extent i_ext; /* caching a largest extent */ - - __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ - - __le32 i_nid[5]; /* direct(2), indirect(2), - double_indirect(1) node id */ -} __packed; - -struct direct_node { - __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ -} __packed; - -struct indirect_node { - __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ -} __packed; - -enum { - COLD_BIT_SHIFT = 0, - FSYNC_BIT_SHIFT, - DENT_BIT_SHIFT, - OFFSET_BIT_SHIFT -}; - -struct node_footer { - __le32 nid; /* node id */ - __le32 ino; /* inode nunmber */ - __le32 flag; /* include cold/fsync/dentry marks and offset */ - __le64 cp_ver; /* checkpoint version */ - __le32 next_blkaddr; /* next node page block address */ -} __packed; - -struct f2fs_node { - /* can be one of three types: inode, direct, and indirect types */ - union { - struct f2fs_inode i; - struct direct_node dn; - struct indirect_node in; - }; - struct node_footer footer; -} __packed; - -/* - * For NAT entries - */ -#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) - -struct f2fs_nat_entry { - __u8 version; /* latest version of cached nat entry */ - __le32 ino; /* inode number */ - __le32 block_addr; /* block address */ -} __packed; - -struct f2fs_nat_block { - struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; -} __packed; - -/* - * For SIT entries - * - * Each segment is 2MB in size by default so that a bitmap for validity of - * there-in blocks should occupy 64 bytes, 512 bits. - * Not allow to change this. - */ -#define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) - -/* - * Note that f2fs_sit_entry->vblocks has the following bit-field information. - * [15:10] : allocation type such as CURSEG_XXXX_TYPE - * [9:0] : valid block count - */ -#define SIT_VBLOCKS_SHIFT 10 -#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) -#define GET_SIT_VBLOCKS(raw_sit) \ - (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) -#define GET_SIT_TYPE(raw_sit) \ - ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ - >> SIT_VBLOCKS_SHIFT) - -struct f2fs_sit_entry { - __le16 vblocks; /* reference above */ - __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ - __le64 mtime; /* segment age for cleaning */ -} __packed; - -struct f2fs_sit_block { - struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; -} __packed; - -/* - * For segment summary - * - * One summary block contains exactly 512 summary entries, which represents - * exactly 2MB segment by default. Not allow to change the basic units. - * - * NOTE: For initializing fields, you must use set_summary - * - * - If data page, nid represents dnode's nid - * - If node page, nid represents the node page's nid. - * - * The ofs_in_node is used by only data page. It represents offset - * from node's page's beginning to get a data block address. - * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) - */ -#define ENTRIES_IN_SUM 512 -#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ -#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ -#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) - -/* a summary entry for a 4KB-sized block in a segment */ -struct f2fs_summary { - __le32 nid; /* parent node id */ - union { - __u8 reserved[3]; - struct { - __u8 version; /* node version number */ - __le16 ofs_in_node; /* block index in parent node */ - } __packed; - }; -} __packed; - -/* summary block type, node or data, is stored to the summary_footer */ -#define SUM_TYPE_NODE (1) -#define SUM_TYPE_DATA (0) - -struct summary_footer { - unsigned char entry_type; /* SUM_TYPE_XXX */ - __u32 check_sum; /* summary checksum */ -} __packed; - -#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ - SUM_ENTRY_SIZE) -#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ - sizeof(struct nat_journal_entry)) -#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ - sizeof(struct nat_journal_entry)) -#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ - sizeof(struct sit_journal_entry)) -#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ - sizeof(struct sit_journal_entry)) -/* - * frequently updated NAT/SIT entries can be stored in the spare area in - * summary blocks - */ -enum { - NAT_JOURNAL = 0, - SIT_JOURNAL -}; - -struct nat_journal_entry { - __le32 nid; - struct f2fs_nat_entry ne; -} __packed; - -struct nat_journal { - struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; - __u8 reserved[NAT_JOURNAL_RESERVED]; -} __packed; - -struct sit_journal_entry { - __le32 segno; - struct f2fs_sit_entry se; -} __packed; - -struct sit_journal { - struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; - __u8 reserved[SIT_JOURNAL_RESERVED]; -} __packed; - -/* 4KB-sized summary block structure */ -struct f2fs_summary_block { - struct f2fs_summary entries[ENTRIES_IN_SUM]; - union { - __le16 n_nats; - __le16 n_sits; - }; - /* spare area is used by NAT or SIT journals */ - union { - struct nat_journal nat_j; - struct sit_journal sit_j; - }; - struct summary_footer footer; -} __packed; - -/* - * For directory operations - */ -#define F2FS_DOT_HASH 0 -#define F2FS_DDOT_HASH F2FS_DOT_HASH -#define F2FS_MAX_HASH (~((0x3ULL) << 62)) -#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) - -typedef __le32 f2fs_hash_t; - -/* One directory entry slot covers 8bytes-long file name */ -#define F2FS_SLOT_LEN 8 -#define F2FS_SLOT_LEN_BITS 3 - -#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) - -/* the number of dentry in a block */ -#define NR_DENTRY_IN_BLOCK 214 - -/* MAX level for dir lookup */ -#define MAX_DIR_HASH_DEPTH 63 - -#define SIZE_OF_DIR_ENTRY 11 /* by byte */ -#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ - BITS_PER_BYTE) -#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ - F2FS_SLOT_LEN) * \ - NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) - -/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ -struct f2fs_dir_entry { - __le32 hash_code; /* hash code of file name */ - __le32 ino; /* inode number */ - __le16 name_len; /* lengh of file name */ - __u8 file_type; /* file type */ -} __packed; - -/* 4KB-sized directory entry block */ -struct f2fs_dentry_block { - /* validity bitmap for directory entries in each block */ - __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; - __u8 reserved[SIZE_OF_RESERVED]; - struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; - __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; -} __packed; - -/* file types used in inode_info->flags */ -enum { - F2FS_FT_UNKNOWN, - F2FS_FT_REG_FILE, - F2FS_FT_DIR, - F2FS_FT_CHRDEV, - F2FS_FT_BLKDEV, - F2FS_FT_FIFO, - F2FS_FT_SOCK, - F2FS_FT_SYMLINK, - F2FS_FT_MAX -}; - -#endif /* _LINUX_F2FS_FS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 2e9d230ff95..5c3b043a645 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1743,19 +1743,6 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } -/** - * set_nlink - directly set an inode's link count - * @inode: inode - * @nlink: new nlink (should be non-zero) - * - * This is a low-level filesystem helper to replace any - * direct filesystem manipulation of i_nlink. - */ -static inline void set_nlink(struct inode *inode, unsigned int nlink) -{ - inode->i_nlink = nlink; -} - /** * inc_nlink - directly increment an inode's link count * @inode: inode diff --git a/include/linux/magic.h b/include/linux/magic.h index 2616b546e83..1e5df2af8d8 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -24,7 +24,6 @@ #define EXT4_SUPER_MAGIC 0xEF53 #define BTRFS_SUPER_MAGIC 0x9123683E #define NILFS_SUPER_MAGIC 0x3434 -#define F2FS_SUPER_MAGIC 0xF2F52010 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 diff --git a/include/linux/security.h b/include/linux/security.h index 7de9c15ac01..95a6d8e24df 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -36,7 +36,6 @@ #include #include #include -#include #include /* Maximum number of letters for an LSM name string */ @@ -148,10 +147,6 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif -/* security_inode_init_security callback function to write xattrs */ -typedef int (*initxattrs) (struct inode *inode, - const struct xattr *xattr_array, void *fs_data); - #ifdef CONFIG_SECURITY struct security_mnt_opts { @@ -1720,9 +1715,6 @@ void security_inode_free(struct inode *inode); int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, char **name, void **value, size_t *len); -int security_new_inode_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, - initxattrs initxattrs, void *fs_data); int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); @@ -2075,16 +2067,7 @@ static inline int security_inode_init_security(struct inode *inode, void **value, size_t *len) { - return 0; -} - -static inline int security_new_inode_init_security(struct inode *inode, - struct inode *dir, - const struct qstr *qstr, - initxattrs initxattrs, - void *fs_data) -{ - return 0; + return -EOPNOTSUPP; } static inline int security_inode_create(struct inode *dir, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 7a378662ddf..aed54c50aa6 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -67,12 +67,6 @@ struct xattr_handler { size_t size, int flags, int handler_flags); }; -struct xattr { - char *name; - void *value; - size_t value_len; -}; - ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h deleted file mode 100644 index 52ae54828ed..00000000000 --- a/include/trace/events/f2fs.h +++ /dev/null @@ -1,682 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM f2fs - -#if !defined(_TRACE_F2FS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_F2FS_H - -#include - -#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) -#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino - -#define show_block_type(type) \ - __print_symbolic(type, \ - { NODE, "NODE" }, \ - { DATA, "DATA" }, \ - { META, "META" }, \ - { META_FLUSH, "META_FLUSH" }) - -#define show_bio_type(type) \ - __print_symbolic(type, \ - { READ, "READ" }, \ - { READA, "READAHEAD" }, \ - { READ_SYNC, "READ_SYNC" }, \ - { WRITE, "WRITE" }, \ - { WRITE_SYNC, "WRITE_SYNC" }, \ - { WRITE_FLUSH, "WRITE_FLUSH" }, \ - { WRITE_FUA, "WRITE_FUA" }) - -#define show_data_type(type) \ - __print_symbolic(type, \ - { CURSEG_HOT_DATA, "Hot DATA" }, \ - { CURSEG_WARM_DATA, "Warm DATA" }, \ - { CURSEG_COLD_DATA, "Cold DATA" }, \ - { CURSEG_HOT_NODE, "Hot NODE" }, \ - { CURSEG_WARM_NODE, "Warm NODE" }, \ - { CURSEG_COLD_NODE, "Cold NODE" }, \ - { NO_CHECK_TYPE, "No TYPE" }) - -#define show_gc_type(type) \ - __print_symbolic(type, \ - { FG_GC, "Foreground GC" }, \ - { BG_GC, "Background GC" }) - -#define show_alloc_mode(type) \ - __print_symbolic(type, \ - { LFS, "LFS-mode" }, \ - { SSR, "SSR-mode" }) - -#define show_victim_policy(type) \ - __print_symbolic(type, \ - { GC_GREEDY, "Greedy" }, \ - { GC_CB, "Cost-Benefit" }) - -struct victim_sel_policy; - -DECLARE_EVENT_CLASS(f2fs__inode, - - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(ino_t, pino) - __field(umode_t, mode) - __field(loff_t, size) - __field(unsigned int, nlink) - __field(blkcnt_t, blocks) - __field(__u8, advise) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pino = F2FS_I(inode)->i_pino; - __entry->mode = inode->i_mode; - __entry->nlink = inode->i_nlink; - __entry->size = inode->i_size; - __entry->blocks = inode->i_blocks; - __entry->advise = F2FS_I(inode)->i_advise; - ), - - TP_printk("dev = (%d,%d), ino = %lu, pino = %lu, i_mode = 0x%hx, " - "i_size = %lld, i_nlink = %u, i_blocks = %llu, i_advise = 0x%x", - show_dev_ino(__entry), - (unsigned long)__entry->pino, - __entry->mode, - __entry->size, - (unsigned int)__entry->nlink, - (unsigned long long)__entry->blocks, - (unsigned char)__entry->advise) -); - -DECLARE_EVENT_CLASS(f2fs__inode_exit, - - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(int, ret) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->ret = ret; - ), - - TP_printk("dev = (%d,%d), ino = %lu, ret = %d", - show_dev_ino(__entry), - __entry->ret) -); - -DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, - - TP_PROTO(struct inode *inode), - - TP_ARGS(inode) -); - -TRACE_EVENT(f2fs_sync_file_exit, - - TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), - - TP_ARGS(inode, need_cp, datasync, ret), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(bool, need_cp) - __field(int, datasync) - __field(int, ret) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->need_cp = need_cp; - __entry->datasync = datasync; - __entry->ret = ret; - ), - - TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " - "datasync = %d, ret = %d", - show_dev_ino(__entry), - __entry->need_cp ? "needed" : "not needed", - __entry->datasync, - __entry->ret) -); - -TRACE_EVENT(f2fs_sync_fs, - - TP_PROTO(struct super_block *sb, int wait), - - TP_ARGS(sb, wait), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(int, dirty) - __field(int, wait) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->dirty = F2FS_SB(sb)->s_dirty; - __entry->wait = wait; - ), - - TP_printk("dev = (%d,%d), superblock is %s, wait = %d", - show_dev(__entry), - __entry->dirty ? "dirty" : "not dirty", - __entry->wait) -); - -DEFINE_EVENT(f2fs__inode, f2fs_iget, - - TP_PROTO(struct inode *inode), - - TP_ARGS(inode) -); - -DEFINE_EVENT(f2fs__inode_exit, f2fs_iget_exit, - - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret) -); - -DEFINE_EVENT(f2fs__inode, f2fs_evict_inode, - - TP_PROTO(struct inode *inode), - - TP_ARGS(inode) -); - -DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode, - - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret) -); - -TRACE_EVENT(f2fs_unlink_enter, - - TP_PROTO(struct inode *dir, struct dentry *dentry), - - TP_ARGS(dir, dentry), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(loff_t, size) - __field(blkcnt_t, blocks) - __field(const char *, name) - ), - - TP_fast_assign( - __entry->dev = dir->i_sb->s_dev; - __entry->ino = dir->i_ino; - __entry->size = dir->i_size; - __entry->blocks = dir->i_blocks; - __entry->name = dentry->d_name.name; - ), - - TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, " - "i_blocks = %llu, name = %s", - show_dev_ino(__entry), - __entry->size, - (unsigned long long)__entry->blocks, - __entry->name) -); - -DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, - - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret) -); - -DEFINE_EVENT(f2fs__inode, f2fs_truncate, - - TP_PROTO(struct inode *inode), - - TP_ARGS(inode) -); - -TRACE_EVENT(f2fs_truncate_data_blocks_range, - - TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs, int free), - - TP_ARGS(inode, nid, ofs, free), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(nid_t, nid) - __field(unsigned int, ofs) - __field(int, free) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->nid = nid; - __entry->ofs = ofs; - __entry->free = free; - ), - - TP_printk("dev = (%d,%d), ino = %lu, nid = %u, offset = %u, freed = %d", - show_dev_ino(__entry), - (unsigned int)__entry->nid, - __entry->ofs, - __entry->free) -); - -DECLARE_EVENT_CLASS(f2fs__truncate_op, - - TP_PROTO(struct inode *inode, u64 from), - - TP_ARGS(inode, from), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(loff_t, size) - __field(blkcnt_t, blocks) - __field(u64, from) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->size = inode->i_size; - __entry->blocks = inode->i_blocks; - __entry->from = from; - ), - - TP_printk("dev = (%d,%d), ino = %lu, i_size = %lld, i_blocks = %llu, " - "start file offset = %llu", - show_dev_ino(__entry), - __entry->size, - (unsigned long long)__entry->blocks, - (unsigned long long)__entry->from) -); - -DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_blocks_enter, - - TP_PROTO(struct inode *inode, u64 from), - - TP_ARGS(inode, from) -); - -DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_blocks_exit, - - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret) -); - -DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_inode_blocks_enter, - - TP_PROTO(struct inode *inode, u64 from), - - TP_ARGS(inode, from) -); - -DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_inode_blocks_exit, - - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret) -); - -DECLARE_EVENT_CLASS(f2fs__truncate_node, - - TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), - - TP_ARGS(inode, nid, blk_addr), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(nid_t, nid) - __field(block_t, blk_addr) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->nid = nid; - __entry->blk_addr = blk_addr; - ), - - TP_printk("dev = (%d,%d), ino = %lu, nid = %u, block_address = 0x%llx", - show_dev_ino(__entry), - (unsigned int)__entry->nid, - (unsigned long long)__entry->blk_addr) -); - -DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_nodes_enter, - - TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), - - TP_ARGS(inode, nid, blk_addr) -); - -DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_nodes_exit, - - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret) -); - -DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, - - TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), - - TP_ARGS(inode, nid, blk_addr) -); - -TRACE_EVENT(f2fs_truncate_partial_nodes, - - TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), - - TP_ARGS(inode, nid, depth, err), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(nid_t, nid[3]) - __field(int, depth) - __field(int, err) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->nid[0] = nid[0]; - __entry->nid[1] = nid[1]; - __entry->nid[2] = nid[2]; - __entry->depth = depth; - __entry->err = err; - ), - - TP_printk("dev = (%d,%d), ino = %lu, " - "nid[0] = %u, nid[1] = %u, nid[2] = %u, depth = %d, err = %d", - show_dev_ino(__entry), - (unsigned int)__entry->nid[0], - (unsigned int)__entry->nid[1], - (unsigned int)__entry->nid[2], - __entry->depth, - __entry->err) -); - -TRACE_EVENT_CONDITION(f2fs_readpage, - - TP_PROTO(struct page *page, sector_t blkaddr, int type), - - TP_ARGS(page, blkaddr, type), - - TP_CONDITION(page->mapping), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(pgoff_t, index) - __field(sector_t, blkaddr) - __field(int, type) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->index = page->index; - __entry->blkaddr = blkaddr; - __entry->type = type; - ), - - TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "blkaddr = 0x%llx, bio_type = %s", - show_dev_ino(__entry), - (unsigned long)__entry->index, - (unsigned long long)__entry->blkaddr, - show_bio_type(__entry->type)) -); - -TRACE_EVENT(f2fs_get_data_block, - TP_PROTO(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int ret), - - TP_ARGS(inode, iblock, bh, ret), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(sector_t, iblock) - __field(sector_t, bh_start) - __field(size_t, bh_size) - __field(int, ret) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->iblock = iblock; - __entry->bh_start = bh->b_blocknr; - __entry->bh_size = bh->b_size; - __entry->ret = ret; - ), - - TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", - show_dev_ino(__entry), - (unsigned long long)__entry->iblock, - (unsigned long long)__entry->bh_start, - (unsigned long long)__entry->bh_size, - __entry->ret) -); - -TRACE_EVENT(f2fs_get_victim, - - TP_PROTO(struct super_block *sb, int type, int gc_type, - struct victim_sel_policy *p, unsigned int pre_victim, - unsigned int prefree, unsigned int free), - - TP_ARGS(sb, type, gc_type, p, pre_victim, prefree, free), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(int, type) - __field(int, gc_type) - __field(int, alloc_mode) - __field(int, gc_mode) - __field(unsigned int, victim) - __field(unsigned int, ofs_unit) - __field(unsigned int, pre_victim) - __field(unsigned int, prefree) - __field(unsigned int, free) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->type = type; - __entry->gc_type = gc_type; - __entry->alloc_mode = p->alloc_mode; - __entry->gc_mode = p->gc_mode; - __entry->victim = p->min_segno; - __entry->ofs_unit = p->ofs_unit; - __entry->pre_victim = pre_victim; - __entry->prefree = prefree; - __entry->free = free; - ), - - TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " - "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", - show_dev(__entry), - show_data_type(__entry->type), - show_gc_type(__entry->gc_type), - show_alloc_mode(__entry->alloc_mode), - show_victim_policy(__entry->gc_mode), - __entry->victim, - __entry->ofs_unit, - (int)__entry->pre_victim, - __entry->prefree, - __entry->free) -); - -TRACE_EVENT(f2fs_fallocate, - - TP_PROTO(struct inode *inode, int mode, - loff_t offset, loff_t len, int ret), - - TP_ARGS(inode, mode, offset, len, ret), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(int, mode) - __field(loff_t, offset) - __field(loff_t, len) - __field(loff_t, size) - __field(blkcnt_t, blocks) - __field(int, ret) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->mode = mode; - __entry->offset = offset; - __entry->len = len; - __entry->size = inode->i_size; - __entry->blocks = inode->i_blocks; - __entry->ret = ret; - ), - - TP_printk("dev = (%d,%d), ino = %lu, mode = %x, offset = %lld, " - "len = %lld, i_size = %lld, i_blocks = %llu, ret = %d", - show_dev_ino(__entry), - __entry->mode, - (unsigned long long)__entry->offset, - (unsigned long long)__entry->len, - (unsigned long long)__entry->size, - (unsigned long long)__entry->blocks, - __entry->ret) -); - -TRACE_EVENT(f2fs_reserve_new_block, - - TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), - - TP_ARGS(inode, nid, ofs_in_node), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(nid_t, nid) - __field(unsigned int, ofs_in_node) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = nid; - __entry->ofs_in_node = ofs_in_node; - ), - - TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", - show_dev(__entry), - (unsigned int)__entry->nid, - __entry->ofs_in_node) -); - -TRACE_EVENT(f2fs_do_submit_bio, - - TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), - - TP_ARGS(sb, btype, sync, bio), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(int, btype) - __field(bool, sync) - __field(sector_t, sector) - __field(unsigned int, size) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->btype = btype; - __entry->sync = sync; - __entry->sector = bio->bi_sector; - __entry->size = bio->bi_size; - ), - - TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", - show_dev(__entry), - show_block_type(__entry->btype), - __entry->sync ? "sync" : "no sync", - (unsigned long long)__entry->sector, - __entry->size) -); - -TRACE_EVENT(f2fs_submit_write_page, - - TP_PROTO(struct page *page, block_t blk_addr, int type), - - TP_ARGS(page, blk_addr, type), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(int, type) - __field(pgoff_t, index) - __field(block_t, block) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->type = type; - __entry->index = page->index; - __entry->block = blk_addr; - ), - - TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", - show_dev_ino(__entry), - show_block_type(__entry->type), - (unsigned long)__entry->index, - (unsigned long long)__entry->block) -); - -TRACE_EVENT(f2fs_write_checkpoint, - - TP_PROTO(struct super_block *sb, bool is_umount, char *msg), - - TP_ARGS(sb, is_umount, msg), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(bool, is_umount) - __field(char *, msg) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->is_umount = is_umount; - __entry->msg = msg; - ), - - TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", - show_dev(__entry), - __entry->is_umount ? "clean umount" : "consistency", - __entry->msg) -); - -#endif /* _TRACE_F2FS_H */ - - /* This part must be outside protection */ -#include diff --git a/security/security.c b/security/security.c index 0dc000974ec..420198e5f32 100644 --- a/security/security.c +++ b/security/security.c @@ -18,8 +18,6 @@ #include #include -#define MAX_LSM_XATTR 1 - /* Boot-time LSM user choice */ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = CONFIG_DEFAULT_SECURITY; @@ -371,37 +369,6 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, } EXPORT_SYMBOL(security_inode_init_security); -int security_new_inode_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, - const initxattrs initxattrs, void *fs_data) -{ - struct xattr new_xattrs[MAX_LSM_XATTR + 1]; - struct xattr *lsm_xattr; - int ret; - - if (unlikely(IS_PRIVATE(inode))) - return -EOPNOTSUPP; - - memset(new_xattrs, 0, sizeof new_xattrs); - if (!initxattrs) - return security_ops->inode_init_security(inode, dir, qstr, - NULL, NULL, NULL); - lsm_xattr = new_xattrs; - ret = security_ops->inode_init_security(inode, dir, qstr, - &lsm_xattr->name, - &lsm_xattr->value, - &lsm_xattr->value_len); - if (ret) - goto out; - ret = initxattrs(inode, new_xattrs, fs_data); -out: - kfree(lsm_xattr->name); - kfree(lsm_xattr->value); - - return (ret == -EOPNOTSUPP) ? 0 : ret; -} -EXPORT_SYMBOL(security_new_inode_init_security); - #ifdef CONFIG_SECURITY_PATH int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, unsigned int dev) From 5a1316961d2b3ffe75056161718b3de36e3bc734 Mon Sep 17 00:00:00 2001 From: klozz Date: Sat, 1 Mar 2014 02:02:14 -0600 Subject: [PATCH 618/678] F2FS: Flash-Friendly File System Conflicts: arch/arm/configs/metallice_grouper_defconfig --- Documentation/filesystems/00-INDEX | 2 + Documentation/filesystems/f2fs.txt | 502 +++++ arch/arm/configs/metallice_grouper_defconfig | 3 + fs/Kconfig | 1 + fs/Makefile | 1 + fs/dcache.c | 4 +- fs/f2fs/Kconfig | 66 + fs/f2fs/Makefile | 8 + fs/f2fs/acl.c | 425 ++++ fs/f2fs/acl.h | 60 + fs/f2fs/checkpoint.c | 862 ++++++++ fs/f2fs/data.c | 792 ++++++++ fs/f2fs/debug.c | 354 ++++ fs/f2fs/dir.c | 716 +++++++ fs/f2fs/f2fs.h | 1292 ++++++++++++ fs/f2fs/file.c | 727 +++++++ fs/f2fs/gc.c | 740 +++++++ fs/f2fs/gc.h | 112 ++ fs/f2fs/hash.c | 103 + fs/f2fs/inode.c | 275 +++ fs/f2fs/namei.c | 559 ++++++ fs/f2fs/node.c | 1861 ++++++++++++++++++ fs/f2fs/node.h | 347 ++++ fs/f2fs/recovery.c | 504 +++++ fs/f2fs/segment.c | 1789 +++++++++++++++++ fs/f2fs/segment.h | 639 ++++++ fs/f2fs/super.c | 1156 +++++++++++ fs/f2fs/xattr.c | 602 ++++++ fs/f2fs/xattr.h | 154 ++ include/linux/dcache.h | 1 + include/linux/f2fs_fs.h | 426 ++++ include/linux/fs.h | 13 + include/linux/magic.h | 1 + include/linux/security.h | 20 +- include/linux/xattr.h | 6 + include/trace/events/f2fs.h | 682 +++++++ security/security.c | 34 + 37 files changed, 15836 insertions(+), 3 deletions(-) create mode 100644 Documentation/filesystems/f2fs.txt create mode 100644 fs/f2fs/Kconfig create mode 100644 fs/f2fs/Makefile create mode 100644 fs/f2fs/acl.c create mode 100644 fs/f2fs/acl.h create mode 100644 fs/f2fs/checkpoint.c create mode 100644 fs/f2fs/data.c create mode 100644 fs/f2fs/debug.c create mode 100644 fs/f2fs/dir.c create mode 100644 fs/f2fs/f2fs.h create mode 100644 fs/f2fs/file.c create mode 100644 fs/f2fs/gc.c create mode 100644 fs/f2fs/gc.h create mode 100644 fs/f2fs/hash.c create mode 100644 fs/f2fs/inode.c create mode 100644 fs/f2fs/namei.c create mode 100644 fs/f2fs/node.c create mode 100644 fs/f2fs/node.h create mode 100644 fs/f2fs/recovery.c create mode 100644 fs/f2fs/segment.c create mode 100644 fs/f2fs/segment.h create mode 100644 fs/f2fs/super.c create mode 100644 fs/f2fs/xattr.c create mode 100644 fs/f2fs/xattr.h create mode 100644 include/linux/f2fs_fs.h create mode 100644 include/trace/events/f2fs.h diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 8c624a18f67..ce5fd467791 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -48,6 +48,8 @@ ext4.txt - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. +f2fs.txt + - info and mount options for the F2FS filesystem. fuse.txt - info on the Filesystem in User SpacE including mount options. gfs2.txt diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt new file mode 100644 index 00000000000..a3fe811bbdb --- /dev/null +++ b/Documentation/filesystems/f2fs.txt @@ -0,0 +1,502 @@ +================================================================================ +WHAT IS Flash-Friendly File System (F2FS)? +================================================================================ + +NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have +been equipped on a variety systems ranging from mobile to server systems. Since +they are known to have different characteristics from the conventional rotating +disks, a file system, an upper layer to the storage device, should adapt to the +changes from the sketch in the design level. + +F2FS is a file system exploiting NAND flash memory-based storage devices, which +is based on Log-structured File System (LFS). The design has been focused on +addressing the fundamental issues in LFS, which are snowball effect of wandering +tree and high cleaning overhead. + +Since a NAND flash memory-based storage device shows different characteristic +according to its internal geometry or flash memory management scheme, namely FTL, +F2FS and its tools support various parameters not only for configuring on-disk +layout, but also for selecting allocation and cleaning algorithms. + +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). +>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git + +For reporting bugs and sending patches, please use the following mailing list: +>> linux-f2fs-devel@lists.sourceforge.net + +================================================================================ +BACKGROUND AND DESIGN ISSUES +================================================================================ + +Log-structured File System (LFS) +-------------------------------- +"A log-structured file system writes all modifications to disk sequentially in +a log-like structure, thereby speeding up both file writing and crash recovery. +The log is the only structure on disk; it contains indexing information so that +files can be read back from the log efficiently. In order to maintain large free +areas on disk for fast writing, we divide the log into segments and use a +segment cleaner to compress the live information from heavily fragmented +segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and +implementation of a log-structured file system", ACM Trans. Computer Systems +10, 1, 26–52. + +Wandering Tree Problem +---------------------- +In LFS, when a file data is updated and written to the end of log, its direct +pointer block is updated due to the changed location. Then the indirect pointer +block is also updated due to the direct pointer block update. In this manner, +the upper index structures such as inode, inode map, and checkpoint block are +also updated recursively. This problem is called as wandering tree problem [1], +and in order to enhance the performance, it should eliminate or relax the update +propagation as much as possible. + +[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ + +Cleaning Overhead +----------------- +Since LFS is based on out-of-place writes, it produces so many obsolete blocks +scattered across the whole storage. In order to serve new empty log space, it +needs to reclaim these obsolete blocks seamlessly to users. This job is called +as a cleaning process. + +The process consists of three operations as follows. +1. A victim segment is selected through referencing segment usage table. +2. It loads parent index structures of all the data in the victim identified by + segment summary blocks. +3. It checks the cross-reference between the data and its parent index structure. +4. It moves valid data selectively. + +This cleaning job may cause unexpected long delays, so the most important goal +is to hide the latencies to users. And also definitely, it should reduce the +amount of valid data to be moved, and move them quickly as well. + +================================================================================ +KEY FEATURES +================================================================================ + +Flash Awareness +--------------- +- Enlarge the random write area for better performance, but provide the high + spatial locality +- Align FS data structures to the operational units in FTL as best efforts + +Wandering Tree Problem +---------------------- +- Use a term, “nodeâ€, that represents inodes as well as various pointer blocks +- Introduce Node Address Table (NAT) containing the locations of all the “node†+ blocks; this will cut off the update propagation. + +Cleaning Overhead +----------------- +- Support a background cleaning process +- Support greedy and cost-benefit algorithms for victim selection policies +- Support multi-head logs for static/dynamic hot and cold data separation +- Introduce adaptive logging for efficient block allocation + +================================================================================ +MOUNT OPTIONS +================================================================================ + +background_gc=%s Turn on/off cleaning operations, namely garbage + collection, triggered in background when I/O subsystem is + idle. If background_gc=on, it will turn on the garbage + collection and if background_gc=off, garbage collection + will be truned off. + Default value for this option is on. So garbage + collection is on by default. +disable_roll_forward Disable the roll-forward recovery routine +discard Issue discard/TRIM commands when a segment is cleaned. +no_heap Disable heap-style segment allocation which finds free + segments for data from the beginning of main area, while + for node from the end of main area. +nouser_xattr Disable Extended User Attributes. Note: xattr is enabled + by default if CONFIG_F2FS_FS_XATTR is selected. +noacl Disable POSIX Access Control List. Note: acl is enabled + by default if CONFIG_F2FS_FS_POSIX_ACL is selected. +active_logs=%u Support configuring the number of active logs. In the + current design, f2fs supports only 2, 4, and 6 logs. + Default number is 6. +disable_ext_identify Disable the extension list configured by mkfs, so f2fs + does not aware of cold files such as media files. +inline_xattr Enable the inline xattrs feature. + +================================================================================ +DEBUGFS ENTRIES +================================================================================ + +/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as +f2fs. Each file shows the whole f2fs information. + +/sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +================================================================================ +SYSFS ENTRIES +================================================================================ + +Information about mounted f2f2 file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/ +(see also Documentation/ABI/testing/sysfs-fs-f2fs) +.............................................................................. + File Content + + gc_max_sleep_time This tuning parameter controls the maximum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_min_sleep_time This tuning parameter controls the minimum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_no_gc_sleep_time This tuning parameter controls the default sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_idle This parameter controls the selection of victim + policy for garbage collection. Setting gc_idle = 0 + (default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach + & setting gc_idle = 2 will select the greedy aproach. + + reclaim_segments This parameter controls the number of prefree + segments to be reclaimed. If the number of prefree + segments is larger than this number, f2fs tries to + conduct checkpoint to reclaim the prefree segments + to free segments. By default, 100 segments, 200MB. + +================================================================================ +USAGE +================================================================================ + +1. Download userland tools and compile them. + +2. Skip, if f2fs was compiled statically inside kernel. + Otherwise, insert the f2fs.ko module. + # insmod f2fs.ko + +3. Create a directory trying to mount + # mkdir /mnt/f2fs + +4. Format the block device, and then mount as f2fs + # mkfs.f2fs -l label /dev/block_device + # mount -t f2fs /dev/block_device /mnt/f2fs + +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: +-l [label] : Give a volume label, up to 512 unicode name. +-a [0 or 1] : Split start location of each area for heap-based allocation. + 1 is set by default, which performs this. +-o [int] : Set overprovision ratio in percent over volume size. + 5 is set by default. +-s [int] : Set the number of segments per section. + 1 is set by default. +-z [int] : Set the number of sections per zone. + 1 is set by default. +-e [str] : Set basic extension list. e.g. "mp3,gif,mov" +-t [0 or 1] : Disable discard command or not. + 1 is set by default, which conducts discard. + +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of: + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information reconized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of: + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples: +# dump.f2fs -i [ino] /dev/sdx +# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) +# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + +================================================================================ +DESIGN +================================================================================ + +On-disk Layout +-------------- + +F2FS divides the whole volume into a number of segments, each of which is fixed +to 2MB in size. A section is composed of consecutive segments, and a zone +consists of a set of sections. By default, section and zone sizes are set to one +segment size identically, but users can easily modify the sizes by mkfs. + +F2FS splits the entire volume into six areas, and all the areas except superblock +consists of multiple segments as described below. + + align with the zone size <-| + |-> align with the segment size + _________________________________________________________________________ + | | | Segment | Node | Segment | | + | Superblock | Checkpoint | Info. | Address | Summary | Main | + | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | | + |____________|_____2______|______N______|______N______|______N_____|__N___| + . . + . . + . . + ._________________________________________. + |_Segment_|_..._|_Segment_|_..._|_Segment_| + . . + ._________._________ + |_section_|__...__|_ + . . + .________. + |__zone__| + +- Superblock (SB) + : It is located at the beginning of the partition, and there exist two copies + to avoid file system crash. It contains basic partition information and some + default parameters of f2fs. + +- Checkpoint (CP) + : It contains file system information, bitmaps for valid NAT/SIT sets, orphan + inode lists, and summary entries of current active segments. + +- Segment Information Table (SIT) + : It contains segment information such as valid block count and bitmap for the + validity of all the blocks. + +- Node Address Table (NAT) + : It is composed of a block address table for all the node blocks stored in + Main area. + +- Segment Summary Area (SSA) + : It contains summary entries which contains the owner information of all the + data and node blocks stored in Main area. + +- Main Area + : It contains file and directory data including their indices. + +In order to avoid misalignment between file system and flash-based storage, F2FS +aligns the start block address of CP with the segment size. Also, it aligns the +start block address of Main area with the zone size by reserving some segments +in SSA area. + +Reference the following survey for additional technical details. +https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey + +File System Metadata Structure +------------------------------ + +F2FS adopts the checkpointing scheme to maintain file system consistency. At +mount time, F2FS first tries to find the last valid checkpoint data by scanning +CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. +One of them always indicates the last valid data, which is called as shadow copy +mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. + +For file system consistency, each CP points to which NAT and SIT copies are +valid, as shown as below. + + +--------+----------+---------+ + | CP | SIT | NAT | + +--------+----------+---------+ + . . . . + . . . . + . . . . + +-------+-------+--------+--------+--------+--------+ + | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 | + +-------+-------+--------+--------+--------+--------+ + | ^ ^ + | | | + `----------------------------------------' + +Index Structure +--------------- + +The key data structure to manage the data locations is a "node". Similar to +traditional file structures, F2FS has three types of node: inode, direct node, +indirect node. F2FS assigns 4KB to an inode block which contains 923 data block +indices, two direct node pointers, two indirect node pointers, and one double +indirect node pointer as described below. One direct node block contains 1018 +data blocks, and one indirect node block contains also 1018 node blocks. Thus, +one inode block (i.e., a file) covers: + + 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. + + Inode block (4KB) + |- data (923) + |- direct node (2) + | `- data (1018) + |- indirect node (2) + | `- direct node (1018) + | `- data (1018) + `- double indirect node (1) + `- indirect node (1018) + `- direct node (1018) + `- data (1018) + +Note that, all the node blocks are mapped by NAT which means the location of +each node is translated by the NAT table. In the consideration of the wandering +tree problem, F2FS is able to cut off the propagation of node updates caused by +leaf data writes. + +Directory Structure +------------------- + +A directory entry occupies 11 bytes, which consists of the following attributes. + +- hash hash value of the file name +- ino inode number +- len the length of file name +- type file type such as directory, symlink, etc + +A dentry block consists of 214 dentry slots and file names. Therein a bitmap is +used to represent whether each dentry is valid or not. A dentry block occupies +4KB with the following composition. + + Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + + dentries(11 * 214 bytes) + file name (8 * 214 bytes) + + [Bucket] + +--------------------------------+ + |dentry block 1 | dentry block 2 | + +--------------------------------+ + . . + . . + . [Dentry Block Structure: 4KB] . + +--------+----------+----------+------------+ + | bitmap | reserved | dentries | file names | + +--------+----------+----------+------------+ + [Dentry Block: 4KB] . . + . . + . . + +------+------+-----+------+ + | hash | ino | len | type | + +------+------+-----+------+ + [Dentry Structure: 11 bytes] + +F2FS implements multi-level hash tables for directory structure. Each level has +a hash table with dedicated number of hash buckets as shown below. Note that +"A(2B)" means a bucket includes 2 data blocks. + +---------------------- +A : bucket +B : block +N : MAX_DIR_HASH_DEPTH +---------------------- + +level #0 | A(2B) + | +level #1 | A(2B) - A(2B) + | +level #2 | A(2B) - A(2B) - A(2B) - A(2B) + . | . . . . +level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) + . | . . . . +level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) + +The number of blocks and buckets are determined by, + + ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, + # of blocks in level #n = | + `- 4, Otherwise + + ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + # of buckets in level #n = | + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + +When F2FS finds a file name in a directory, at first a hash value of the file +name is calculated. Then, F2FS scans the hash table in level #0 to find the +dentry consisting of the file name and its inode number. If not found, F2FS +scans the next hash table in level #1. In this way, F2FS scans hash tables in +each levels incrementally from 1 to N. In each levels F2FS needs to scan only +one bucket determined by the following equation, which shows O(log(# of files)) +complexity. + + bucket number to scan in level #n = (hash value) % (# of buckets in level #n) + +In the case of file creation, F2FS finds empty consecutive slots that cover the +file name. F2FS searches the empty slots in the hash tables of whole levels from +1 to N in the same way as the lookup operation. + +The following figure shows an example of two cases holding children. + --------------> Dir <-------------- + | | + child child + + child - child [hole] - child + + child - child - child [hole] - [hole] - child + + Case 1: Case 2: + Number of children = 6, Number of children = 3, + File size = 7 File size = 7 + +Default Block Allocation +------------------------ + +At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node +and Hot/Warm/Cold data. + +- Hot node contains direct node blocks of directories. +- Warm node contains direct node blocks except hot node blocks. +- Cold node contains indirect node blocks +- Hot data contains dentry blocks +- Warm data contains data blocks except hot and cold data blocks +- Cold data contains multimedia data or migrated data blocks + +LFS has two schemes for free space management: threaded log and copy-and-compac- +tion. The copy-and-compaction scheme which is known as cleaning, is well-suited +for devices showing very good sequential write performance, since free segments +are served all the time for writing new data. However, it suffers from cleaning +overhead under high utilization. Contrarily, the threaded log scheme suffers +from random writes, but no cleaning process is needed. F2FS adopts a hybrid +scheme where the copy-and-compaction scheme is adopted by default, but the +policy is dynamically changed to the threaded log scheme according to the file +system status. + +In order to align F2FS with underlying flash-based storage, F2FS allocates a +segment in a unit of section. F2FS expects that the section size would be the +same as the unit size of garbage collection in FTL. Furthermore, with respect +to the mapping granularity in FTL, F2FS allocates each section of the active +logs from different zones as much as possible, since FTL can write the data in +the active logs into one allocation unit according to its mapping granularity. + +Cleaning process +---------------- + +F2FS does cleaning both on demand and in the background. On-demand cleaning is +triggered when there are not enough free segments to serve VFS calls. Background +cleaner is operated by a kernel thread, and triggers the cleaning job when the +system is idle. + +F2FS supports two victim selection policies: greedy and cost-benefit algorithms. +In the greedy algorithm, F2FS selects a victim segment having the smallest number +of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment +according to the segment age and the number of valid blocks in order to address +log block thrashing problem in the greedy algorithm. F2FS adopts the greedy +algorithm for on-demand cleaner, while background cleaner adopts cost-benefit +algorithm. + +In order to identify whether the data in the victim segment are valid or not, +F2FS manages a bitmap. Each bit represents the validity of a block, and the +bitmap is composed of a bit stream covering whole blocks in main area. diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 241c0b045cd..7381367c327 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -3027,6 +3027,9 @@ CONFIG_PROC_PAGE_MONITOR=y CONFIG_REPORT_PRESENT_CPUS=y CONFIG_SYSFS=y CONFIG_TMPFS=y +CONFIG_F2FS_FS=y +CONFIG_F2FS_FS_XATTR=y +CONFIG_F2FS_FS_SECURITY=y # CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_TMPFS_XATTR is not set # CONFIG_HUGETLB_PAGE is not set diff --git a/fs/Kconfig b/fs/Kconfig index 3130a45eafa..aebcee21e5d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -219,6 +219,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" +source "fs/f2fs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index cd17b767c56..63e532972b3 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -120,6 +120,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ +obj-$(CONFIG_F2FS_FS) += f2fs/ obj-$(CONFIG_EXOFS_FS) += exofs/ obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ diff --git a/fs/dcache.c b/fs/dcache.c index 8b732a205d5..239f5e664aa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1438,7 +1438,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) return alias; } -static struct dentry * d_find_any_alias(struct inode *inode) +struct dentry * d_find_any_alias(struct inode *inode) { struct dentry *de; @@ -1447,7 +1447,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) spin_unlock(&inode->i_lock); return de; } - +EXPORT_SYMBOL(d_find_any_alias); /** * d_obtain_alias - find or allocate a dentry for a given inode diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig new file mode 100644 index 00000000000..e5ad9500e6a --- /dev/null +++ b/fs/f2fs/Kconfig @@ -0,0 +1,66 @@ +config F2FS_FS + tristate "F2FS filesystem support (EXPERIMENTAL)" + depends on BLOCK + help + F2FS is based on Log-structured File System (LFS), which supports + versatile "flash-friendly" features. The design has been focused on + addressing the fundamental issues in LFS, which are snowball effect + of wandering tree and high cleaning overhead. + + Since flash-based storages show different characteristics according to + the internal geometry or flash memory management schemes aka FTL, F2FS + and tools support various parameters not only for configuring on-disk + layout, but also for selecting allocation and cleaning algorithms. + + If unsure, say N. + +config F2FS_STAT_FS + bool "F2FS Status Information" + depends on F2FS_FS && DEBUG_FS + default y + help + /sys/kernel/debug/f2fs/ contains information about all the partitions + mounted as f2fs. Each file shows the whole f2fs information. + + /sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +config F2FS_FS_XATTR + bool "F2FS extended attributes" + depends on F2FS_FS + default y + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + for details). + + If unsure, say N. + +config F2FS_FS_POSIX_ACL + bool "F2FS Access Control Lists" + depends on F2FS_FS_XATTR + select FS_POSIX_ACL + default y + help + Posix Access Control Lists (ACLs) support permissions for users and + gourps beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + + If you don't know what Access Control Lists are, say N + +config F2FS_FS_SECURITY + bool "F2FS Security Labels" + depends on F2FS_FS_XATTR + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the f2fs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. + diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile new file mode 100644 index 00000000000..bcc0bc34559 --- /dev/null +++ b/fs/f2fs/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_F2FS_FS) += f2fs.o + +f2fs-y := dir.o file.o inode.o namei.o hash.o super.o +f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o +f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o +f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o +f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o + diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c new file mode 100644 index 00000000000..a989e40ad9b --- /dev/null +++ b/fs/f2fs/acl.c @@ -0,0 +1,425 @@ +/* + * fs/f2fs/acl.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * Portions of this code from linux/fs/ext2/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include "f2fs.h" +#include "xattr.h" +#include "acl.h" + +#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ + (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) + +static inline size_t f2fs_acl_size(int count) +{ + if (count <= 4) { + return sizeof(struct f2fs_acl_header) + + count * sizeof(struct f2fs_acl_entry_short); + } else { + return sizeof(struct f2fs_acl_header) + + 4 * sizeof(struct f2fs_acl_entry_short) + + (count - 4) * sizeof(struct f2fs_acl_entry); + } +} + +static inline int f2fs_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(struct f2fs_acl_header); + s = size - 4 * sizeof(struct f2fs_acl_entry_short); + if (s < 0) { + if (size % sizeof(struct f2fs_acl_entry_short)) + return -1; + return size / sizeof(struct f2fs_acl_entry_short); + } else { + if (s % sizeof(struct f2fs_acl_entry)) + return -1; + return s / sizeof(struct f2fs_acl_entry) + 4; + } +} + +static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) +{ + int i, count; + struct posix_acl *acl; + struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; + struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); + const char *end = value + size; + + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) + return ERR_PTR(-EINVAL); + + count = f2fs_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + + if ((char *)entry > end) + goto fail; + + acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + + case ACL_USER: + case ACL_GROUP: + acl->a_entries[i].e_id = le32_to_cpu(entry->e_id); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + default: + goto fail; + } + } + if ((char *)entry != end) + goto fail; + return acl; +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + struct f2fs_acl_header *f2fs_acl; + struct f2fs_acl_entry *entry; + int i; + + f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * + sizeof(struct f2fs_acl_entry), GFP_KERNEL); + if (!f2fs_acl) + return ERR_PTR(-ENOMEM); + + f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION); + entry = (struct f2fs_acl_entry *)(f2fs_acl + 1); + + for (i = 0; i < acl->a_count; i++) { + + entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = cpu_to_le32(acl->a_entries[i].e_id); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + default: + goto fail; + } + } + *size = f2fs_acl_size(acl->a_count); + return (void *)f2fs_acl; + +fail: + kfree(f2fs_acl); + return ERR_PTR(-EINVAL); +} + +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + void *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(sbi, POSIX_ACL)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + if (type == ACL_TYPE_ACCESS) + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + + retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = f2fs_getxattr(inode, name_index, "", value, retval); + } + + if (retval > 0) + acl = f2fs_acl_from_disk(value, retval); + else if (retval == -ENODATA) + acl = NULL; + else + acl = ERR_PTR(retval); + kfree(value); + + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + return acl; +} + +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + int name_index; + void *value = NULL; + size_t size = 0; + int error; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + error = posix_acl_equiv_mode(acl, &inode->i_mode); + if (error < 0) + return error; + set_acl_inode(fi, inode->i_mode); + if (error == 0) + acl = NULL; + } + break; + + case ACL_TYPE_DEFAULT: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + + if (acl) { + value = f2fs_acl_to_disk(acl, &size); + if (IS_ERR(value)) { + cond_clear_inode_flag(fi, FI_ACL_MODE); + return (int)PTR_ERR(value); + } + } + + error = f2fs_setxattr(inode, name_index, "", value, size, NULL); + + kfree(value); + if (!error) + set_cached_acl(inode, type, acl); + + cond_clear_inode_flag(fi, FI_ACL_MODE); + return error; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(sbi, POSIX_ACL)) { + acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl && !(test_opt(sbi, ANDROID_EMU) && + F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU)) + inode->i_mode &= ~current_umask(); + } + + if (test_opt(sbi, POSIX_ACL) && acl) { + + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + } +cleanup: + posix_acl_release(acl); + return error; +} + +int f2fs_acl_chmod(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct posix_acl *acl; + int error; + mode_t mode = get_inode_mode(inode); + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(mode)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, mode); + if (error) + return error; + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return error; +} + +int f2fs_android_emu(struct f2fs_sb_info *sbi, struct inode *inode, + u32 *uid, u32 *gid, umode_t *mode) +{ + F2FS_I(inode)->i_advise |= FADVISE_ANDROID_EMU; + + if (uid) + *uid = sbi->android_emu_uid; + if (gid) + *gid = sbi->android_emu_gid; + if (mode) { + *mode = (*mode & ~S_IRWXUGO) | sbi->android_emu_mode; + if (F2FS_I(inode)->i_advise & FADVISE_ANDROID_EMU_ROOT) + *mode &= ~S_IRWXO; + if (S_ISDIR(*mode)) { + if (*mode & S_IRUSR) + *mode |= S_IXUSR; + if (*mode & S_IRGRP) + *mode |= S_IXGRP; + if (*mode & S_IROTH) + *mode |= S_IXOTH; + } + } + + return 0; +} + +static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const char *xname = POSIX_ACL_XATTR_DEFAULT; + size_t size; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + + if (type == ACL_TYPE_ACCESS) + xname = POSIX_ACL_XATTR_ACCESS; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct posix_acl *acl; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else { + acl = NULL; + } + + error = f2fs_set_acl(inode, type, acl); + +release_and_out: + posix_acl_release(acl); + return error; +} + +const struct xattr_handler f2fs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; + +const struct xattr_handler f2fs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h new file mode 100644 index 00000000000..813f545475b --- /dev/null +++ b/fs/f2fs/acl.h @@ -0,0 +1,60 @@ +/* + * fs/f2fs/acl.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * + * Portions of this code from linux/fs/ext2/acl.h + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_ACL_H__ +#define __F2FS_ACL_H__ + +#include + +#define F2FS_ACL_VERSION 0x0001 + +struct f2fs_acl_entry { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +}; + +struct f2fs_acl_entry_short { + __le16 e_tag; + __le16 e_perm; +}; + +struct f2fs_acl_header { + __le32 a_version; +}; + +#ifdef CONFIG_F2FS_FS_POSIX_ACL + +extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); +extern int f2fs_acl_chmod(struct inode *inode); +extern int f2fs_init_acl(struct inode *inode, struct inode *dir); +#else +#define f2fs_check_acl NULL +#define f2fs_get_acl NULL +#define f2fs_set_acl NULL + +static inline int f2fs_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif +#endif /* __F2FS_ACL_H__ */ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c new file mode 100644 index 00000000000..58ebfee6e59 --- /dev/null +++ b/fs/f2fs/checkpoint.c @@ -0,0 +1,862 @@ +/* + * fs/f2fs/checkpoint.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +static struct kmem_cache *orphan_entry_slab; +static struct kmem_cache *inode_entry_slab; + +/* + * We guarantee no failure on the returned page. + */ +struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page = NULL; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + + /* We wait writeback only inside grab_meta_page() */ + wait_on_page_writeback(page); + SetPageUptodate(page); + return page; +} + +/* + * We guarantee no failure on the returned page. + */ +struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + if (PageUptodate(page)) + goto out; + + if (f2fs_readpage(sbi, page, index, READ_SYNC)) + goto repeat; + + lock_page(page); + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +out: + mark_page_accessed(page); + return page; +} + +static int f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + /* Should not write any meta pages, if any IO error was occurred */ + if (wbc->for_reclaim || sbi->por_doing || + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { + dec_page_count(sbi, F2FS_DIRTY_META); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; + } + + wait_on_page_writeback(page); + + write_meta_page(sbi, page); + dec_page_count(sbi, F2FS_DIRTY_META); + unlock_page(page); + return 0; +} + +static int f2fs_write_meta_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + struct block_device *bdev = sbi->sb->s_bdev; + long written; + + if (wbc->for_kupdate) + return 0; + + if (get_pages(sbi, F2FS_DIRTY_META) == 0) + return 0; + + /* if mounting is failed, skip writing node pages */ + mutex_lock(&sbi->cp_mutex); + written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); + mutex_unlock(&sbi->cp_mutex); + wbc->nr_to_write -= written; + return 0; +} + +long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, + long nr_to_write) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + pgoff_t index = 0, end = LONG_MAX; + struct pagevec pvec; + long nwritten = 0; + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + pagevec_init(&pvec, 0); + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + lock_page(page); + BUG_ON(page->mapping != mapping); + BUG_ON(!PageDirty(page)); + clear_page_dirty_for_io(page); + if (f2fs_write_meta_page(page, &wbc)) { + unlock_page(page); + break; + } + if (nwritten++ >= nr_to_write) + break; + } + pagevec_release(&pvec); + cond_resched(); + } + + if (nwritten) + f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); + + return nwritten; +} + +static int f2fs_set_meta_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_META); + return 1; + } + return 0; +} + +const struct address_space_operations f2fs_meta_aops = { + .writepage = f2fs_write_meta_page, + .writepages = f2fs_write_meta_pages, + .set_page_dirty = f2fs_set_meta_page_dirty, +}; + +int acquire_orphan_inode(struct f2fs_sb_info *sbi) +{ + unsigned int max_orphans; + int err = 0; + + /* + * considering 512 blocks in a segment 5 blocks are needed for cp + * and log segment summaries. Remaining blocks are used to keep + * orphan entries with the limitation one reserved segment + * for cp pack we can have max 1020*507 orphan entries + */ + max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; + mutex_lock(&sbi->orphan_inode_mutex); + if (sbi->n_orphans >= max_orphans) + err = -ENOSPC; + else + sbi->n_orphans++; + mutex_unlock(&sbi->orphan_inode_mutex); + return err; +} + +void release_orphan_inode(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->orphan_inode_mutex); + if (sbi->n_orphans == 0) { + f2fs_msg(sbi->sb, KERN_ERR, "releasing " + "unacquired orphan inode"); + f2fs_handle_error(sbi); + } else + sbi->n_orphans--; + mutex_unlock(&sbi->orphan_inode_mutex); +} + +void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head, *this; + struct orphan_inode_entry *new = NULL, *orphan = NULL; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each(this, head) { + orphan = list_entry(this, struct orphan_inode_entry, list); + if (orphan->ino == ino) + goto out; + if (orphan->ino > ino) + break; + orphan = NULL; + } +retry: + new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + if (!new) { + cond_resched(); + goto retry; + } + new->ino = ino; + + /* add new_oentry into list which is sorted by inode number */ + if (orphan) + list_add(&new->list, this->prev); + else + list_add_tail(&new->list, head); +out: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head; + struct orphan_inode_entry *orphan; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each_entry(orphan, head, list) { + if (orphan->ino == ino) { + list_del(&orphan->list); + kmem_cache_free(orphan_entry_slab, orphan); + if (sbi->n_orphans == 0) { + f2fs_msg(sbi->sb, KERN_ERR, "removing " + "unacquired orphan inode %d", + ino); + f2fs_handle_error(sbi); + } else + sbi->n_orphans--; + break; + } + } + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct inode *inode = f2fs_iget(sbi->sb, ino); + if (IS_ERR(inode)) { + f2fs_msg(sbi->sb, KERN_ERR, "unable to recover orphan inode %d", + ino); + f2fs_handle_error(sbi); + return; + } + clear_nlink(inode); + + /* truncate all the data during iput */ + iput(inode); +} + +int recover_orphan_inodes(struct f2fs_sb_info *sbi) +{ + block_t start_blk, orphan_blkaddr, i, j; + + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) + return 0; + + sbi->por_doing = 1; + start_blk = __start_cp_addr(sbi) + 1; + orphan_blkaddr = __start_sum_addr(sbi) - 1; + + for (i = 0; i < orphan_blkaddr; i++) { + struct page *page = get_meta_page(sbi, start_blk + i); + struct f2fs_orphan_block *orphan_blk; + + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { + nid_t ino = le32_to_cpu(orphan_blk->ino[j]); + recover_orphan_inode(sbi, ino); + } + f2fs_put_page(page, 1); + } + /* clear Orphan Flag */ + clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); + sbi->por_doing = 0; + return 0; +} + +static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) +{ + struct list_head *head, *this, *next; + struct f2fs_orphan_block *orphan_blk = NULL; + struct page *page = NULL; + unsigned int nentries = 0; + unsigned short index = 1; + unsigned short orphan_blocks; + + orphan_blocks = (unsigned short)((sbi->n_orphans + + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + + /* loop for each orphan inode entry and write them in Jornal block */ + list_for_each_safe(this, next, head) { + struct orphan_inode_entry *orphan; + + orphan = list_entry(this, struct orphan_inode_entry, list); + + if (nentries == F2FS_ORPHANS_PER_BLOCK) { + /* + * an orphan block is full of 1020 entries, + * then we need to flush current orphan blocks + * and bring another one in memory + */ + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); + index++; + start_blk++; + nentries = 0; + page = NULL; + } + if (page) + goto page_exist; + + page = grab_meta_page(sbi, start_blk); + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + memset(orphan_blk, 0, sizeof(*orphan_blk)); +page_exist: + orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); + } + if (!page) + goto end; + + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); +end: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, + block_t cp_addr, unsigned long long *version) +{ + struct page *cp_page_1, *cp_page_2 = NULL; + unsigned long blk_size = sbi->blocksize; + struct f2fs_checkpoint *cp_block; + unsigned long long cur_version = 0, pre_version = 0; + size_t crc_offset; + __u32 crc = 0; + + /* Read the 1st cp block in this CP pack */ + cp_page_1 = get_meta_page(sbi, cp_addr); + + /* get the version number */ + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp1; + + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp1; + + pre_version = cur_cp_version(cp_block); + + /* Read the 2nd cp block in this CP pack */ + cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_page_2 = get_meta_page(sbi, cp_addr); + + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp2; + + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp2; + + cur_version = cur_cp_version(cp_block); + + if (cur_version == pre_version) { + *version = cur_version; + f2fs_put_page(cp_page_2, 1); + return cp_page_1; + } +invalid_cp2: + f2fs_put_page(cp_page_2, 1); +invalid_cp1: + f2fs_put_page(cp_page_1, 1); + return NULL; +} + +int get_valid_checkpoint(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *cp_block; + struct f2fs_super_block *fsb = sbi->raw_super; + struct page *cp1, *cp2, *cur_page; + unsigned long blk_size = sbi->blocksize; + unsigned long long cp1_version = 0, cp2_version = 0; + unsigned long long cp_start_blk_no; + + sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + if (!sbi->ckpt) + return -ENOMEM; + /* + * Finding out valid cp block involves read both + * sets( cp pack1 and cp pack 2) + */ + cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); + cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); + + /* The second checkpoint pack should start at the next segment */ + cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); + + if (cp1 && cp2) { + if (ver_after(cp2_version, cp1_version)) + cur_page = cp2; + else + cur_page = cp1; + } else if (cp1) { + cur_page = cp1; + } else if (cp2) { + cur_page = cp2; + } else { + goto fail_no_cp; + } + + cp_block = (struct f2fs_checkpoint *)page_address(cur_page); + memcpy(sbi->ckpt, cp_block, blk_size); + + f2fs_put_page(cp1, 1); + f2fs_put_page(cp2, 1); + return 0; + +fail_no_cp: + kfree(sbi->ckpt); + return -EINVAL; +} + +static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) + return -EEXIST; + } + list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs++; +#endif + return 0; +} + +void set_dirty_dir_page(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; + + if (!S_ISDIR(inode->i_mode)) + return; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); + + inc_page_count(sbi, F2FS_DIRTY_DENTS); + inode_inc_dirty_dents(inode); + SetPagePrivate(page); + spin_unlock(&sbi->dir_inode_lock); +} + +void add_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); + spin_unlock(&sbi->dir_inode_lock); +} + +void remove_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + + if (!S_ISDIR(inode->i_mode)) + return; + + spin_lock(&sbi->dir_inode_lock); + if (atomic_read(&F2FS_I(inode)->dirty_dents)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } + + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) { + list_del(&entry->list); + kmem_cache_free(inode_entry_slab, entry); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs--; +#endif + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + + /* Only from the recovery routine */ + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { + clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); + iput(inode); + } +} + +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + struct inode *inode = NULL; + + spin_lock(&sbi->dir_inode_lock); + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode->i_ino == ino) { + inode = entry->inode; + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + return inode; +} + +void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->dir_inode_list; + struct dir_inode_entry *entry; + struct inode *inode; +retry: + spin_lock(&sbi->dir_inode_lock); + if (list_empty(head)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } + entry = list_entry(head->next, struct dir_inode_entry, list); + inode = igrab(entry->inode); + spin_unlock(&sbi->dir_inode_lock); + if (inode) { + filemap_flush(inode->i_mapping); + iput(inode); + } else { + /* + * We should submit bio, since it exists several + * wribacking dentry pages in the freeing inode. + */ + f2fs_submit_bio(sbi, DATA, true); + } + goto retry; +} + +/* + * Freeze all the FS-operations for checkpoint. + */ +static void block_operations(struct f2fs_sb_info *sbi) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + struct blk_plug plug; + + blk_start_plug(&plug); + +retry_flush_dents: + mutex_lock_all(sbi); + + /* write all the dirty dentry pages */ + if (get_pages(sbi, F2FS_DIRTY_DENTS)) { + mutex_unlock_all(sbi); + sync_dirty_dir_inodes(sbi); + goto retry_flush_dents; + } + + /* + * POR: we should ensure that there is no dirty node pages + * until finishing nat/sit flush. + */ +retry_flush_nodes: + mutex_lock(&sbi->node_write); + + if (get_pages(sbi, F2FS_DIRTY_NODES)) { + mutex_unlock(&sbi->node_write); + sync_node_pages(sbi, 0, &wbc); + goto retry_flush_nodes; + } + blk_finish_plug(&plug); +} + +static void unblock_operations(struct f2fs_sb_info *sbi) +{ + mutex_unlock(&sbi->node_write); + mutex_unlock_all(sbi); +} + +static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + nid_t last_nid = 0; + block_t start_blk; + struct page *cp_page; + unsigned int data_sum_blocks, orphan_blocks; + __u32 crc32 = 0; + void *kaddr; + int i; + + /* Flush all the NAT/SIT pages */ + while (get_pages(sbi, F2FS_DIRTY_META)) + sync_meta_pages(sbi, META, LONG_MAX); + + next_free_nid(sbi, &last_nid); + + /* + * modify checkpoint + * version number is already updated + */ + ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); + for (i = 0; i < 3; i++) { + ckpt->cur_node_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); + ckpt->cur_node_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); + ckpt->alloc_type[i + CURSEG_HOT_NODE] = + curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); + } + for (i = 0; i < 3; i++) { + ckpt->cur_data_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); + ckpt->cur_data_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); + ckpt->alloc_type[i + CURSEG_HOT_DATA] = + curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); + } + + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* 2 cp + n data seg summary + orphan inode blocks */ + data_sum_blocks = npages_for_summary_flush(sbi); + if (data_sum_blocks < 3) + set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + else + clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + + orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) + / F2FS_ORPHANS_PER_BLOCK; + ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + + if (is_umount) { + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + } else { + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks); + } + + if (sbi->n_orphans) + set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + else + clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + + /* update SIT/NAT bitmap */ + get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); + get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); + + crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); + *((__le32 *)((unsigned char *)ckpt + + le32_to_cpu(ckpt->checksum_offset))) + = cpu_to_le32(crc32); + + start_blk = __start_cp_addr(sbi); + + /* write out checkpoint buffer at block 0 */ + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + if (sbi->n_orphans) { + write_orphan_inodes(sbi, start_blk); + start_blk += orphan_blocks; + } + + write_data_summaries(sbi, start_blk); + start_blk += data_sum_blocks; + if (is_umount) { + write_node_summaries(sbi, start_blk); + start_blk += NR_CURSEG_NODE_TYPE; + } + + /* writeout checkpoint block */ + cp_page = grab_meta_page(sbi, start_blk); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + /* wait for previous submitted node/meta pages writeback */ + while (get_pages(sbi, F2FS_WRITEBACK)) + congestion_wait(BLK_RW_ASYNC, HZ / 50); + + filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); + filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + + /* Here, we only have one bio having CP pack */ + sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + clear_prefree_segments(sbi); + F2FS_RESET_SB_DIRT(sbi); + } +} + +/* + * We guarantee that this checkpoint procedure should not fail. + */ +void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_ver; + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); + + mutex_lock(&sbi->cp_mutex); + block_operations(sbi); + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); + + f2fs_submit_bio(sbi, DATA, true); + f2fs_submit_bio(sbi, NODE, true); + f2fs_submit_bio(sbi, META, true); + + /* + * update checkpoint pack index + * Increase the version number so that + * SIT entries and seg summaries are written at correct place + */ + ckpt_ver = cur_cp_version(ckpt); + ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); + + /* write cached NAT/SIT entries to NAT/SIT area */ + flush_nat_entries(sbi); + flush_sit_entries(sbi); + + /* unlock all the fs_lock[] in do_checkpoint() */ + do_checkpoint(sbi, is_umount); + + unblock_operations(sbi); + mutex_unlock(&sbi->cp_mutex); + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); +} + +void init_orphan_info(struct f2fs_sb_info *sbi) +{ + mutex_init(&sbi->orphan_inode_mutex); + INIT_LIST_HEAD(&sbi->orphan_inode_list); + sbi->n_orphans = 0; +} + +int __init create_checkpoint_caches(void) +{ + orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", + sizeof(struct orphan_inode_entry), NULL); + if (unlikely(!orphan_entry_slab)) + return -ENOMEM; + inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", + sizeof(struct dir_inode_entry), NULL); + if (unlikely(!inode_entry_slab)) { + kmem_cache_destroy(orphan_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_checkpoint_caches(void) +{ + kmem_cache_destroy(orphan_entry_slab); + kmem_cache_destroy(inode_entry_slab); +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c new file mode 100644 index 00000000000..f47a72cfa6e --- /dev/null +++ b/fs/f2fs/data.c @@ -0,0 +1,792 @@ +/* + * fs/f2fs/data.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +/* + * Lock ordering for the change of data block address: + * ->data_page + * ->node_page + * update block addresses in the node page + */ +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +{ + struct f2fs_node *rn; + __le32 *addr_array; + struct page *node_page = dn->node_page; + unsigned int ofs_in_node = dn->ofs_in_node; + + f2fs_wait_on_page_writeback(node_page, NODE, false); + + rn = F2FS_NODE(node_page); + + /* Get physical address of data block */ + addr_array = blkaddr_in_node(rn); + addr_array[ofs_in_node] = cpu_to_le32(new_addr); + set_page_dirty(node_page); +} + +int reserve_new_block(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return -EPERM; + if (!inc_valid_block_count(sbi, dn->inode, 1)) + return -ENOSPC; + + trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); + + __set_data_blkaddr(dn, NEW_ADDR); + dn->data_blkaddr = NEW_ADDR; + sync_inode_page(dn); + return 0; +} + +static int check_extent_cache(struct inode *inode, pgoff_t pgofs, + struct buffer_head *bh_result) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif + pgoff_t start_fofs, end_fofs; + block_t start_blkaddr; + + read_lock(&fi->ext.ext_lock); + if (fi->ext.len == 0) { + read_unlock(&fi->ext.ext_lock); + return 0; + } + +#ifdef CONFIG_F2FS_STAT_FS + sbi->total_hit_ext++; +#endif + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + + if (pgofs >= start_fofs && pgofs <= end_fofs) { + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + size_t count; + + clear_buffer_new(bh_result); + map_bh(bh_result, inode->i_sb, + start_blkaddr + pgofs - start_fofs); + count = end_fofs - pgofs + 1; + if (count < (UINT_MAX >> blkbits)) + bh_result->b_size = (count << blkbits); + else + bh_result->b_size = UINT_MAX; + +#ifdef CONFIG_F2FS_STAT_FS + sbi->read_hit_ext++; +#endif + read_unlock(&fi->ext.ext_lock); + return 1; + } + read_unlock(&fi->ext.ext_lock); + return 0; +} + +void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +{ + struct f2fs_inode_info *fi = F2FS_I(dn->inode); + pgoff_t fofs, start_fofs, end_fofs; + block_t start_blkaddr, end_blkaddr; + + BUG_ON(blk_addr == NEW_ADDR); + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + + /* Update the page address in the parent node */ + __set_data_blkaddr(dn, blk_addr); + + write_lock(&fi->ext.ext_lock); + + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; + + /* Drop and initialize the matched extent */ + if (fi->ext.len == 1 && fofs == start_fofs) + fi->ext.len = 0; + + /* Initial extent */ + if (fi->ext.len == 0) { + if (blk_addr != NULL_ADDR) { + fi->ext.fofs = fofs; + fi->ext.blk_addr = blk_addr; + fi->ext.len = 1; + } + goto end_update; + } + + /* Front merge */ + if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + fi->ext.fofs--; + fi->ext.blk_addr--; + fi->ext.len++; + goto end_update; + } + + /* Back merge */ + if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + fi->ext.len++; + goto end_update; + } + + /* Split the existing extent */ + if (fi->ext.len > 1 && + fofs >= start_fofs && fofs <= end_fofs) { + if ((end_fofs - fofs) < (fi->ext.len >> 1)) { + fi->ext.len = fofs - start_fofs; + } else { + fi->ext.fofs = fofs + 1; + fi->ext.blk_addr = start_blkaddr + + fofs - start_fofs + 1; + fi->ext.len -= fofs - start_fofs + 1; + } + goto end_update; + } + write_unlock(&fi->ext.ext_lock); + return; + +end_update: + write_unlock(&fi->ext.ext_lock); + sync_inode_page(dn); +} + +struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + + page = find_get_page(mapping, index); + if (page && PageUptodate(page)) + return page; + f2fs_put_page(page, 0); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) + return ERR_PTR(err); + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) + return ERR_PTR(-ENOENT); + + /* By fallocate(), there is no cached page, but with NEW_ADDR */ + if (dn.data_blkaddr == NEW_ADDR) + return ERR_PTR(-EINVAL); + + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) { + unlock_page(page); + return page; + } + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, + sync ? READ_SYNC : READA); + if (sync) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); + } + } + return page; +} + +/* + * If it tries to access a hole, return an error. + * Because, the callers, functions in dir.c and GC, should be able to know + * whether this page exists or not. + */ +struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + +repeat: + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return ERR_PTR(-ENOMEM); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_page(page, 1); + return ERR_PTR(-ENOENT); + } + + if (PageUptodate(page)) + return page; + + /* + * A new dentry page is allocated but not able to be written, since its + * new inode page couldn't be allocated due to -ENOSPC. + * In such the case, its blkaddr can be remained as NEW_ADDR. + * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. + */ + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return page; + } + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return ERR_PTR(err); + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + return page; +} + +/* + * Caller ensures that this data page is never allocated. + * A new zero-filled data page is allocated in the page cache. + * + * Also, caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + * Note that, npage is set only by make_empty_dir. + */ +struct page *get_new_data_page(struct inode *inode, + struct page *npage, pgoff_t index, bool new_i_size) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct page *page; + struct dnode_of_data dn; + int err; + + set_new_dnode(&dn, inode, npage, npage, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (err) + return ERR_PTR(err); + + if (dn.data_blkaddr == NULL_ADDR) { + if (reserve_new_block(&dn)) { + if (!npage) + f2fs_put_dnode(&dn); + return ERR_PTR(-ENOSPC); + } + } + if (!npage) + f2fs_put_dnode(&dn); +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) + return page; + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return ERR_PTR(err); + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + } + + if (new_i_size && + i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { + i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + /* Only the directory inode sets new_i_size */ + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + mark_inode_dirty_sync(inode); + } + return page; +} + +static void read_end_io(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } while (bvec >= bio->bi_io_vec); + bio_put(bio); +} + +/* + * Fill the locked page with data located in the block address. + * Return unlocked page. + */ +int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, int type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + + trace_f2fs_readpage(page, blk_addr, type); + + down_read(&sbi->bio_sem); + + /* Allocate a new bio */ + bio = f2fs_bio_alloc(bdev, 1); + + /* Initialize the bio */ + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + bio->bi_end_io = read_end_io; + + if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + bio_put(bio); + up_read(&sbi->bio_sem); + f2fs_put_page(page, 1); + return -EFAULT; + } + + submit_bio(type, bio); + up_read(&sbi->bio_sem); + return 0; +} + +/* + * This function should be used by the data read flow only where it + * does not check the "create" flag that indicates block allocation. + * The reason for this special functionality is to exploit VFS readahead + * mechanism. + */ +static int get_data_block_ro(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + unsigned maxblocks = bh_result->b_size >> blkbits; + struct dnode_of_data dn; + pgoff_t pgofs; + int err; + + /* Get the page offset from the block offset(iblock) */ + pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); + + if (check_extent_cache(inode, pgofs, bh_result)) { + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); + return 0; + } + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + if (err) { + trace_f2fs_get_data_block(inode, iblock, bh_result, err); + return (err == -ENOENT) ? 0 : err; + } + + /* It does not support data allocation */ + BUG_ON(create); + + if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { + int i; + unsigned int end_offset; + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : + ADDRS_PER_BLOCK; + + clear_buffer_new(bh_result); + + /* Give more consecutive addresses for the read ahead */ + for (i = 0; i < end_offset - dn.ofs_in_node; i++) + if (((datablock_addr(dn.node_page, + dn.ofs_in_node + i)) + != (dn.data_blkaddr + i)) || maxblocks == i) + break; + map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + bh_result->b_size = (i << blkbits); + } + f2fs_put_dnode(&dn); + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); + return 0; +} + +static int f2fs_read_data_page(struct file *file, struct page *page) +{ + return mpage_readpage(page, get_data_block_ro); +} + +static int f2fs_read_data_pages(struct file *file, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); +} + +int do_write_data_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + block_t old_blk_addr, new_blk_addr; + struct dnode_of_data dn; + int err = 0; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + if (err) + return err; + + old_blk_addr = dn.data_blkaddr; + + /* This page is already truncated */ + if (old_blk_addr == NULL_ADDR) + goto out_writepage; + + set_page_writeback(page); + + /* + * If current allocation needs SSR, + * it had better in-place writes for updated data. + */ + if (unlikely(old_blk_addr != NEW_ADDR && + !is_cold_data(page) && + need_inplace_update(inode))) { + rewrite_data_page(F2FS_SB(inode->i_sb), page, + old_blk_addr); + } else { + write_data_page(inode, page, &dn, + old_blk_addr, &new_blk_addr); + update_extent_cache(new_blk_addr, &dn); + } +out_writepage: + f2fs_put_dnode(&dn); + return err; +} + +static int f2fs_write_data_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = ((unsigned long long) i_size) + >> PAGE_CACHE_SHIFT; + unsigned offset; + bool need_balance_fs = false; + int err = 0; + + if (page->index < end_index) + goto write; + + /* + * If the offset is out-of-range of file size, + * this page does not have to be written to disk. + */ + offset = i_size & (PAGE_CACHE_SIZE - 1); + if ((page->index >= end_index + 1) || !offset) { + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + goto out; + } + + zero_user_segment(page, offset, PAGE_CACHE_SIZE); +write: + if (sbi->por_doing) { + err = AOP_WRITEPAGE_ACTIVATE; + goto redirty_out; + } + + /* Dentry blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + err = do_write_data_page(page); + } else { + int ilock = mutex_lock_op(sbi); + err = do_write_data_page(page); + mutex_unlock_op(sbi, ilock); + need_balance_fs = true; + } + if (err == -ENOENT) + goto out; + else if (err) + goto redirty_out; + + if (wbc->for_reclaim) + f2fs_submit_bio(sbi, DATA, true); + + clear_cold_data(page); +out: + unlock_page(page); + if (need_balance_fs) + f2fs_balance_fs(sbi); + return 0; + +redirty_out: + wbc->pages_skipped++; + set_page_dirty(page); + return err; +} + +#define MAX_DESIRED_PAGES_WP 4096 + +static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct address_space *mapping = data; + int ret = mapping->a_ops->writepage(page, wbc); + mapping_set_error(mapping, ret); + return ret; +} + +static int f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + bool locked = false; + int ret; + long excess_nrtw = 0, desired_nrtw; + + /* deal with chardevs and other special file */ + if (!mapping->a_ops->writepage) + return 0; + + if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { + desired_nrtw = MAX_DESIRED_PAGES_WP; + excess_nrtw = desired_nrtw - wbc->nr_to_write; + wbc->nr_to_write = desired_nrtw; + } + + if (!S_ISDIR(inode->i_mode)) { + mutex_lock(&sbi->writepages); + locked = true; + } + ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + if (locked) + mutex_unlock(&sbi->writepages); + f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); + + remove_dirty_dir_inode(inode); + + wbc->nr_to_write -= excess_nrtw; + return ret; +} + +static int f2fs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; + struct dnode_of_data dn; + int err = 0; + int ilock; + + f2fs_balance_fs(sbi); +repeat: + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + ilock = mutex_lock_op(sbi); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (err) + goto err; + + if (dn.data_blkaddr == NULL_ADDR) + err = reserve_new_block(&dn); + + f2fs_put_dnode(&dn); + if (err) + goto err; + + mutex_unlock_op(sbi, ilock); + + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) + return 0; + + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + unsigned end = start + len; + + /* Reading beyond i_size is simple: memset to zero */ + zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); + goto out; + } + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) + return err; + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return -EIO; + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + } +out: + SetPageUptodate(page); + clear_cold_data(page); + return 0; + +err: + mutex_unlock_op(sbi, ilock); + f2fs_put_page(page, 1); + return err; +} + +static int f2fs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + SetPageUptodate(page); + set_page_dirty(page); + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + unlock_page(page); + page_cache_release(page); + return copied; +} + +static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) + return 0; + + /* Needs synchronization with the cleaner */ + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + get_data_block_ro); +} + +static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode) && PageDirty(page)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + ClearPagePrivate(page); +} + +static int f2fs_release_data_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + +static int f2fs_set_data_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + set_dirty_dir_page(inode, page); + return 1; + } + return 0; +} + +static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) +{ + return generic_block_bmap(mapping, block, get_data_block_ro); +} + +const struct address_space_operations f2fs_dblock_aops = { + .readpage = f2fs_read_data_page, + .readpages = f2fs_read_data_pages, + .writepage = f2fs_write_data_page, + .writepages = f2fs_write_data_pages, + .write_begin = f2fs_write_begin, + .write_end = f2fs_write_end, + .set_page_dirty = f2fs_set_data_page_dirty, + .invalidatepage = f2fs_invalidate_data_page, + .releasepage = f2fs_release_data_page, + .direct_IO = f2fs_direct_IO, + .bmap = f2fs_bmap, +}; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c new file mode 100644 index 00000000000..07d58ea6b65 --- /dev/null +++ b/fs/f2fs/debug.c @@ -0,0 +1,354 @@ +/* + * f2fs debugging statistics + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Copyright (c) 2012 Linux Foundation + * Copyright (c) 2012 Greg Kroah-Hartman + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" + +static LIST_HEAD(f2fs_stat_list); +static struct dentry *debugfs_root; +static DEFINE_MUTEX(f2fs_stat_mutex); + +static void update_general_status(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + int i; + + /* valid check of the segment numbers */ + si->hit_ext = sbi->read_hit_ext; + si->total_ext = sbi->total_hit_ext; + si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); + si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); + si->ndirty_dirs = sbi->n_dirty_dirs; + si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; + si->rsvd_segs = reserved_segments(sbi); + si->overp_segs = overprovision_segments(sbi); + si->valid_count = valid_user_blocks(sbi); + si->valid_node_count = valid_node_count(sbi); + si->valid_inode_count = valid_inode_count(sbi); + si->utilization = utilization(sbi); + + si->free_segs = free_segments(sbi); + si->free_secs = free_sections(sbi); + si->prefree_count = prefree_segments(sbi); + si->dirty_count = dirty_segments(sbi); + si->node_pages = sbi->node_inode->i_mapping->nrpages; + si->meta_pages = sbi->meta_inode->i_mapping->nrpages; + si->nats = NM_I(sbi)->nat_cnt; + si->sits = SIT_I(sbi)->dirty_sentries; + si->fnids = NM_I(sbi)->fcnt; + si->bg_gc = sbi->bg_gc; + si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_valid = (int)(written_block_count(sbi) >> + sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_invalid = 50 - si->util_free - si->util_valid; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + si->curseg[i] = curseg->segno; + si->cursec[i] = curseg->segno / sbi->segs_per_sec; + si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + } + + for (i = 0; i < 2; i++) { + si->segment_count[i] = sbi->segment_count[i]; + si->block_count[i] = sbi->block_count[i]; + } +} + +/* + * This function calculates BDF of every segments + */ +static void update_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno, vblocks; + int ndirty = 0; + + bimodal = 0; + total_vblocks = 0; + blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); + hblks_per_sec = blks_per_sec / 2; + mutex_lock(&sit_i->sentry_lock); + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + dist = abs(vblocks - hblks_per_sec); + bimodal += dist * dist; + + if (vblocks > 0 && vblocks < blks_per_sec) { + total_vblocks += vblocks; + ndirty++; + } + } + mutex_unlock(&sit_i->sentry_lock); + dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; + si->bimodal = bimodal / dist; + if (si->dirty_count) + si->avg_vblocks = total_vblocks / ndirty; + else + si->avg_vblocks = 0; +} + +/* + * This function calculates memory footprint. + */ +static void update_mem_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned npages; + + if (si->base_mem) + goto get_cache; + + si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + si->base_mem += 2 * sizeof(struct f2fs_inode_info); + si->base_mem += sizeof(*sbi->ckpt); + + /* build sm */ + si->base_mem += sizeof(struct f2fs_sm_info); + + /* build sit */ + si->base_mem += sizeof(struct sit_info); + si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); + if (sbi->segs_per_sec > 1) + si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); + si->base_mem += __bitmap_size(sbi, SIT_BITMAP); + + /* build free segmap */ + si->base_mem += sizeof(struct free_segmap_info); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + + /* build curseg */ + si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; + si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE; + + /* build dirty segmap */ + si->base_mem += sizeof(struct dirty_seglist_info); + si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + + /* buld nm */ + si->base_mem += sizeof(struct f2fs_nm_info); + si->base_mem += __bitmap_size(sbi, NAT_BITMAP); + + /* build gc */ + si->base_mem += sizeof(struct f2fs_gc_kthread); + +get_cache: + /* free nids */ + si->cache_mem = NM_I(sbi)->fcnt; + si->cache_mem += NM_I(sbi)->nat_cnt; + npages = sbi->node_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + npages = sbi->meta_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); +} + +static int stat_show(struct seq_file *s, void *v) +{ + struct f2fs_stat_info *si; + int i = 0; + int j; + + mutex_lock(&f2fs_stat_mutex); + list_for_each_entry(si, &f2fs_stat_list, stat_list) { + char devname[BDEVNAME_SIZE]; + + update_general_status(si->sbi); + + seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n", + bdevname(si->sbi->sb->s_bdev, devname), i++); + seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", + si->sit_area_segs, si->nat_area_segs); + seq_printf(s, "[SSA: %d] [MAIN: %d", + si->ssa_area_segs, si->main_area_segs); + seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", + si->overp_segs, si->rsvd_segs); + seq_printf(s, "Utilization: %d%% (%d valid blocks)\n", + si->utilization, si->valid_count); + seq_printf(s, " - Node: %u (Inode: %u, ", + si->valid_node_count, si->valid_inode_count); + seq_printf(s, "Other: %u)\n - Data: %u\n", + si->valid_node_count - si->valid_inode_count, + si->valid_count - si->valid_node_count); + seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", + si->main_area_segs, si->main_area_sections, + si->main_area_zones); + seq_printf(s, " - COLD data: %d, %d, %d\n", + si->curseg[CURSEG_COLD_DATA], + si->cursec[CURSEG_COLD_DATA], + si->curzone[CURSEG_COLD_DATA]); + seq_printf(s, " - WARM data: %d, %d, %d\n", + si->curseg[CURSEG_WARM_DATA], + si->cursec[CURSEG_WARM_DATA], + si->curzone[CURSEG_WARM_DATA]); + seq_printf(s, " - HOT data: %d, %d, %d\n", + si->curseg[CURSEG_HOT_DATA], + si->cursec[CURSEG_HOT_DATA], + si->curzone[CURSEG_HOT_DATA]); + seq_printf(s, " - Dir dnode: %d, %d, %d\n", + si->curseg[CURSEG_HOT_NODE], + si->cursec[CURSEG_HOT_NODE], + si->curzone[CURSEG_HOT_NODE]); + seq_printf(s, " - File dnode: %d, %d, %d\n", + si->curseg[CURSEG_WARM_NODE], + si->cursec[CURSEG_WARM_NODE], + si->curzone[CURSEG_WARM_NODE]); + seq_printf(s, " - Indir nodes: %d, %d, %d\n", + si->curseg[CURSEG_COLD_NODE], + si->cursec[CURSEG_COLD_NODE], + si->curzone[CURSEG_COLD_NODE]); + seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", + si->main_area_segs - si->dirty_count - + si->prefree_count - si->free_segs, + si->dirty_count); + seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", + si->prefree_count, si->free_segs, si->free_secs); + seq_printf(s, "GC calls: %d (BG: %d)\n", + si->call_count, si->bg_gc); + seq_printf(s, " - data segments : %d\n", si->data_segs); + seq_printf(s, " - node segments : %d\n", si->node_segs); + seq_printf(s, "Try to move %d blocks\n", si->tot_blks); + seq_printf(s, " - data blocks : %d\n", si->data_blks); + seq_printf(s, " - node blocks : %d\n", si->node_blks); + seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", + si->hit_ext, si->total_ext); + seq_printf(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - nodes %4d in %4d\n", + si->ndirty_node, si->node_pages); + seq_printf(s, " - dents %4d in dirs:%4d\n", + si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - meta %4d in %4d\n", + si->ndirty_meta, si->meta_pages); + seq_printf(s, " - NATs %5d > %lu\n", + si->nats, NM_WOUT_THRESHOLD); + seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", + si->sits, si->fnids); + seq_puts(s, "\nDistribution of User Blocks:"); + seq_puts(s, " [ valid | invalid | free ]\n"); + seq_puts(s, " ["); + + for (j = 0; j < si->util_valid; j++) + seq_putc(s, '-'); + seq_putc(s, '|'); + + for (j = 0; j < si->util_invalid; j++) + seq_putc(s, '-'); + seq_putc(s, '|'); + + for (j = 0; j < si->util_free; j++) + seq_putc(s, '-'); + seq_puts(s, "]\n\n"); + seq_printf(s, "SSR: %u blocks in %u segments\n", + si->block_count[SSR], si->segment_count[SSR]); + seq_printf(s, "LFS: %u blocks in %u segments\n", + si->block_count[LFS], si->segment_count[LFS]); + + /* segment usage info */ + update_sit_info(si->sbi); + seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", + si->bimodal, si->avg_vblocks); + + /* memory footprint */ + update_mem_info(si->sbi); + seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", + (si->base_mem + si->cache_mem) >> 10, + si->base_mem >> 10, si->cache_mem >> 10); + } + mutex_unlock(&f2fs_stat_mutex); + return 0; +} + +static int stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, stat_show, inode->i_private); +} + +static const struct file_operations stat_fops = { + .open = stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int f2fs_build_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_stat_info *si; + + si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + if (!si) + return -ENOMEM; + + si->all_area_segs = le32_to_cpu(raw_super->segment_count); + si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); + si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); + si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa); + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + si->sbi = sbi; + sbi->stat_info = si; + + mutex_lock(&f2fs_stat_mutex); + list_add_tail(&si->stat_list, &f2fs_stat_list); + mutex_unlock(&f2fs_stat_mutex); + + return 0; +} + +void f2fs_destroy_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + mutex_lock(&f2fs_stat_mutex); + list_del(&si->stat_list); + mutex_unlock(&f2fs_stat_mutex); + + kfree(si); +} + +void __init f2fs_create_root_stats(void) +{ + debugfs_root = debugfs_create_dir("f2fs", NULL); + if (debugfs_root) + debugfs_create_file("status", S_IRUGO, debugfs_root, + NULL, &stat_fops); +} + +void f2fs_destroy_root_stats(void) +{ + debugfs_remove_recursive(debugfs_root); + debugfs_root = NULL; +} diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c new file mode 100644 index 00000000000..be6cd8a704b --- /dev/null +++ b/fs/f2fs/dir.c @@ -0,0 +1,716 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/dir.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "f2fs.h" +#include "node.h" +#include "acl.h" +#include "xattr.h" + +static unsigned long dir_blocks(struct inode *inode) +{ + return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) + >> PAGE_CACHE_SHIFT; +} + +static unsigned int dir_buckets(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 1 << level; + else + return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); +} + +static unsigned int bucket_blocks(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 2; + else + return 4; +} + +static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { + [F2FS_FT_UNKNOWN] = DT_UNKNOWN, + [F2FS_FT_REG_FILE] = DT_REG, + [F2FS_FT_DIR] = DT_DIR, + [F2FS_FT_CHRDEV] = DT_CHR, + [F2FS_FT_BLKDEV] = DT_BLK, + [F2FS_FT_FIFO] = DT_FIFO, + [F2FS_FT_SOCK] = DT_SOCK, + [F2FS_FT_SYMLINK] = DT_LNK, +}; + +#define S_SHIFT 12 +static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, + [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, +}; + +static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; +} + +static unsigned long dir_block_index(unsigned int level, unsigned int idx) +{ + unsigned long i; + unsigned long bidx = 0; + + for (i = 0; i < level; i++) + bidx += dir_buckets(i) * bucket_blocks(i); + bidx += idx * bucket_blocks(level); + return bidx; +} + +static bool early_match_name(const char *name, size_t namelen, + f2fs_hash_t namehash, struct f2fs_dir_entry *de) +{ + if (le16_to_cpu(de->name_len) != namelen) + return false; + + if (de->hash_code != namehash) + return false; + + return true; +} + +static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, + const char *name, size_t namelen, int *max_slots, + f2fs_hash_t namehash, struct page **res_page, + bool nocase) +{ + struct f2fs_dir_entry *de; + unsigned long bit_pos, end_pos, next_pos; + struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); + int slots; + + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, 0); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + de = &dentry_blk->dentry[bit_pos]; + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + + if (nocase) { + if ((le16_to_cpu(de->name_len) == namelen) && + !strncasecmp(dentry_blk->filename[bit_pos], + name, namelen)) { + *res_page = dentry_page; + goto found; + } + } else if (early_match_name(name, namelen, namehash, de)) { + if (!memcmp(dentry_blk->filename[bit_pos], + name, namelen)) { + *res_page = dentry_page; + goto found; + } + } + next_pos = bit_pos + slots; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, next_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + end_pos = NR_DENTRY_IN_BLOCK; + else + end_pos = bit_pos; + if (*max_slots < end_pos - next_pos) + *max_slots = end_pos - next_pos; + } + + de = NULL; + kunmap(dentry_page); +found: + return de; +} + +static struct f2fs_dir_entry *find_in_level(struct inode *dir, + unsigned int level, const char *name, size_t namelen, + f2fs_hash_t namehash, struct page **res_page) +{ + int s = GET_DENTRY_SLOTS(namelen); + unsigned int nbucket, nblock; + unsigned int bidx, end_block; + struct page *dentry_page; + struct f2fs_dir_entry *de = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + bool room = false; + int max_slots = 0; + + BUG_ON(level > MAX_DIR_HASH_DEPTH); + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); + end_block = bidx + nblock; + + for (; bidx < end_block; bidx++) { + bool nocase = false; + + /* no need to allocate new dentry pages to all the indices */ + dentry_page = find_data_page(dir, bidx, true); + if (IS_ERR(dentry_page)) { + room = true; + continue; + } + + if (test_opt(sbi, ANDROID_EMU) && + (sbi->android_emu_flags & F2FS_ANDROID_EMU_NOCASE) && + F2FS_I(dir)->i_advise & FADVISE_ANDROID_EMU) + nocase = true; + + de = find_in_block(dentry_page, name, namelen, + &max_slots, namehash, res_page, + nocase); + if (de) + break; + + if (max_slots >= s) + room = true; + f2fs_put_page(dentry_page, 0); + } + + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; + } + + return de; +} + +/* + * Find an entry in the specified directory with the wanted name. + * It returns the page where the entry was found (as a parameter - res_page), + * and the entry itself. Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, + struct qstr *child, struct page **res_page) +{ + const char *name = child->name; + size_t namelen = child->len; + unsigned long npages = dir_blocks(dir); + struct f2fs_dir_entry *de = NULL; + f2fs_hash_t name_hash; + unsigned int max_depth; + unsigned int level; + + if (namelen > F2FS_NAME_LEN) + return NULL; + + if (npages == 0) + return NULL; + + *res_page = NULL; + + name_hash = f2fs_dentry_hash(name, namelen); + max_depth = F2FS_I(dir)->i_current_depth; + + for (level = 0; level < max_depth; level++) { + de = find_in_level(dir, level, name, + namelen, name_hash, res_page); + if (de) + break; + } + if (!de && F2FS_I(dir)->chash != name_hash) { + F2FS_I(dir)->chash = name_hash; + F2FS_I(dir)->clevel = level - 1; + } + return de; +} + +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) +{ + struct page *page; + struct f2fs_dir_entry *de; + struct f2fs_dentry_block *dentry_blk; + + page = get_lock_data_page(dir, 0); + if (IS_ERR(page)) + return NULL; + + dentry_blk = kmap(page); + de = &dentry_blk->dentry[1]; + *p = page; + unlock_page(page); + return de; +} + +ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) +{ + ino_t res = 0; + struct f2fs_dir_entry *de; + struct page *page; + + de = f2fs_find_entry(dir, qstr, &page); + if (de) { + res = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + } + + return res; +} + +void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, + struct page *page, struct inode *inode) +{ + lock_page(page); + wait_on_page_writeback(page); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + kunmap(page); + set_page_dirty(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + mark_inode_dirty(dir); + + /* update parent inode number before releasing dentry page */ + F2FS_I(inode)->i_pino = dir->i_ino; + + f2fs_put_page(page, 1); +} + +static void init_dent_inode(const struct qstr *name, struct page *ipage) +{ + struct f2fs_node *rn; + + /* copy name info. to this inode page */ + rn = F2FS_NODE(ipage); + rn->i.i_namelen = cpu_to_le32(name->len); + memcpy(rn->i.i_name, name->name, name->len); + set_page_dirty(ipage); +} + +int update_dent_inode(struct inode *inode, const struct qstr *name) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + init_dent_inode(name, page); + f2fs_put_page(page, 1); + + return 0; +} + +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + void *kaddr; + + dentry_page = get_new_data_page(inode, page, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + + de = &dentry_blk->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(dentry_blk->filename[0], ".", 1); + set_de_type(de, inode); + + de = &dentry_blk->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(dentry_blk->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + kunmap_atomic(kaddr); + + set_page_dirty(dentry_page); + f2fs_put_page(dentry_page, 1); + return 0; +} + +static struct page *init_inode_metadata(struct inode *inode, + struct inode *dir, const struct qstr *name) +{ + struct page *page; + int err; + + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + page = new_inode_page(inode, name); + if (IS_ERR(page)) + return page; + + if (S_ISDIR(inode->i_mode)) { + err = make_empty_dir(inode, dir, page); + if (err) + goto error; + } + + err = f2fs_init_acl(inode, dir); + if (err) + goto error; + + err = f2fs_init_security(inode, dir, name, page); + if (err) + goto error; + + wait_on_page_writeback(page); + } else { + page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(page)) + return page; + + wait_on_page_writeback(page); + set_cold_node(inode, page); + } + + init_dent_inode(name, page); + + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + file_lost_pino(inode); + inc_nlink(inode); + } + return page; + +error: + f2fs_put_page(page, 1); + remove_inode_page(inode); + return ERR_PTR(err); +} + +static void update_parent_metadata(struct inode *dir, struct inode *inode, + unsigned int current_depth) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (S_ISDIR(inode->i_mode)) { + inc_nlink(dir); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + } + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + if (F2FS_I(dir)->i_current_depth != current_depth) { + F2FS_I(dir)->i_current_depth = current_depth; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) + update_inode_page(dir); + else + mark_inode_dirty(dir); + + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); +} + +static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_start); + if (zero_start >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + + zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + goto next; +} + +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ +int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) +{ + unsigned int bit_pos; + unsigned int level; + unsigned int current_depth; + unsigned long bidx, block; + f2fs_hash_t dentry_hash; + struct f2fs_dir_entry *de; + unsigned int nbucket, nblock; + size_t namelen = name->len; + struct page *dentry_page = NULL; + struct f2fs_dentry_block *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + dentry_hash = f2fs_dentry_hash(name->name, name->len); + level = 0; + current_depth = F2FS_I(dir)->i_current_depth; + if (F2FS_I(dir)->chash == dentry_hash) { + level = F2FS_I(dir)->clevel; + F2FS_I(dir)->chash = 0; + } + +start: + if (current_depth == MAX_DIR_HASH_DEPTH) + return -ENOSPC; + + /* Increase the depth, if required */ + if (level == current_depth) + ++current_depth; + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); + + for (block = bidx; block <= (bidx + nblock - 1); block++) { + dentry_page = get_new_data_page(dir, NULL, block, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + dentry_blk = kmap(dentry_page); + bit_pos = room_for_filename(dentry_blk, slots); + if (bit_pos < NR_DENTRY_IN_BLOCK) + goto add_dentry; + + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + } + + /* Move to next level to find the empty slot for new dentry */ + ++level; + goto start; +add_dentry: + wait_on_page_writeback(dentry_page); + + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = dentry_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(dentry_page); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, current_depth); +fail: + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + return err; +} + +/* + * It only removes the dentry from the dentry page,corresponding name + * entry in name page does not need to be touched during deletion. + */ +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *inode) +{ + struct f2fs_dentry_block *dentry_blk; + unsigned int bit_pos; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + void *kaddr = page_address(page); + int i; + + lock_page(page); + wait_on_page_writeback(page); + + dentry_blk = (struct f2fs_dentry_block *)kaddr; + bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + /* Let's check and deallocate this dentry page */ + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + 0); + kunmap(page); /* kunmap - pair of f2fs_find_entry */ + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode && S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + update_inode_page(dir); + } else { + mark_inode_dirty(dir); + } + + if (inode) { + inode->i_ctime = CURRENT_TIME; + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); + } + + if (bit_pos == NR_DENTRY_IN_BLOCK) { + truncate_hole(dir, page->index, page->index + 1); + clear_page_dirty_for_io(page); + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(dir); + } + f2fs_put_page(page, 1); +} + +bool f2fs_empty_dir(struct inode *dir) +{ + unsigned long bidx; + struct page *dentry_page; + unsigned int bit_pos; + struct f2fs_dentry_block *dentry_blk; + unsigned long nblock = dir_blocks(dir); + + for (bidx = 0; bidx < nblock; bidx++) { + void *kaddr; + dentry_page = get_lock_data_page(dir, bidx); + if (IS_ERR(dentry_page)) { + if (PTR_ERR(dentry_page) == -ENOENT) + continue; + else + return false; + } + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + if (bidx == 0) + bit_pos = 2; + else + bit_pos = 0; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + kunmap_atomic(kaddr); + + f2fs_put_page(dentry_page, 1); + + if (bit_pos < NR_DENTRY_IN_BLOCK) + return false; + } + return true; +} + +static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + unsigned long pos = file->f_pos; + struct inode *inode = file->f_dentry->d_inode; + unsigned long npages = dir_blocks(inode); + unsigned char *types = NULL; + unsigned int bit_pos = 0, start_bit_pos = 0; + int over = 0; + struct f2fs_dentry_block *dentry_blk = NULL; + struct f2fs_dir_entry *de = NULL; + struct page *dentry_page = NULL; + unsigned int n = 0; + unsigned char d_type = DT_UNKNOWN; + int slots; + + types = f2fs_filetype_table; + bit_pos = (pos % NR_DENTRY_IN_BLOCK); + n = (pos / NR_DENTRY_IN_BLOCK); + + for ( ; n < npages; n++) { + dentry_page = get_lock_data_page(inode, n); + if (IS_ERR(dentry_page)) + continue; + + start_bit_pos = bit_pos; + dentry_blk = kmap(dentry_page); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + d_type = DT_UNKNOWN; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + break; + + de = &dentry_blk->dentry[bit_pos]; + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; + + over = filldir(dirent, + dentry_blk->filename[bit_pos], + le16_to_cpu(de->name_len), + (n * NR_DENTRY_IN_BLOCK) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; + goto success; + } + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + bit_pos += slots; + } + bit_pos = 0; + file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + dentry_page = NULL; + } +success: + if (dentry_page && !IS_ERR(dentry_page)) { + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + } + + return 0; +} + +const struct file_operations f2fs_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = f2fs_readdir, + .fsync = f2fs_sync_file, + .unlocked_ioctl = f2fs_ioctl, +}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h new file mode 100644 index 00000000000..e0558ca8250 --- /dev/null +++ b/fs/f2fs/f2fs.h @@ -0,0 +1,1292 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/f2fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_H +#define _LINUX_F2FS_H + +#include +#include +#include +#include +#include +#include +#include + +/* + * For mount options + */ +#define F2FS_MOUNT_BG_GC 0x00000001 +#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 +#define F2FS_MOUNT_DISCARD 0x00000004 +#define F2FS_MOUNT_NOHEAP 0x00000008 +#define F2FS_MOUNT_XATTR_USER 0x00000010 +#define F2FS_MOUNT_POSIX_ACL 0x00000020 +#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 +#define F2FS_MOUNT_INLINE_XATTR 0x00000080 +#define F2FS_MOUNT_ANDROID_EMU 0x00001000 +#define F2FS_MOUNT_ERRORS_PANIC 0x00002000 +#define F2FS_MOUNT_ERRORS_RECOVER 0x00004000 + +#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) + +#define ver_after(a, b) (typecheck(unsigned long long, a) && \ + typecheck(unsigned long long, b) && \ + ((long long)((a) - (b)) > 0)) + +typedef u32 block_t; /* + * should not change u32, since it is the on-disk block + * address format, __le32. + */ +typedef u32 nid_t; + +struct f2fs_mount_info { + unsigned int opt; +}; + +#define CRCPOLY_LE 0xedb88320 + +static inline __u32 f2fs_crc32(void *buf, size_t len) +{ + unsigned char *p = (unsigned char *)buf; + __u32 crc = F2FS_SUPER_MAGIC; + int i; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; +} + +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) +{ + return f2fs_crc32(buf, buf_size) == blk_crc; +} + +/* + * For checkpoint manager + */ +enum { + NAT_BITMAP, + SIT_BITMAP +}; + +/* for the list of orphan inodes */ +struct orphan_inode_entry { + struct list_head list; /* list head */ + nid_t ino; /* inode number */ +}; + +/* for the list of directory inodes */ +struct dir_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ +}; + +/* for the list of fsync inodes, used only during recovery */ +struct fsync_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ + block_t blkaddr; /* block address locating the last inode */ +}; + +#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) +#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) + +#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) +#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) +#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) +#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) + +static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = nats_in_cursum(rs); + rs->n_nats = cpu_to_le16(before + i); + return before; +} + +static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = sits_in_cursum(rs); + rs->n_sits = cpu_to_le16(before + i); + return before; +} + +/* + * ioctl commands + */ +#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS +#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS + +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) +/* + * ioctl commands in 32 bit emulation + */ +#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#endif + +/* + * For INODE and NODE manager + */ +/* + * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 + * as its node offset to distinguish from index node blocks. + * But some bits are used to mark the node block. + */ +#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \ + >> OFFSET_BIT_SHIFT) +enum { + ALLOC_NODE, /* allocate a new node page if needed */ + LOOKUP_NODE, /* look up a node without readahead */ + LOOKUP_NODE_RA, /* + * look up a node with readahead called + * by get_datablock_ro. + */ +}; + +#define F2FS_LINK_MAX 32000 /* maximum link count per file */ + +/* for in-memory extent cache entry */ +struct extent_info { + rwlock_t ext_lock; /* rwlock for consistency */ + unsigned int fofs; /* start offset in a file */ + u32 blk_addr; /* start block address of the extent */ + unsigned int len; /* length of the extent */ +}; + +/* + * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. + */ +#define FADVISE_COLD_BIT 0x01 +#define FADVISE_LOST_PINO_BIT 0x02 +#define FADVISE_ANDROID_EMU 0x10 +#define FADVISE_ANDROID_EMU_ROOT 0x20 + +struct f2fs_inode_info { + struct inode vfs_inode; /* serve a vfs inode */ + unsigned long i_flags; /* keep an inode flags for ioctl */ + unsigned char i_advise; /* use to give file attribute hints */ + unsigned int i_current_depth; /* use only in directory structure */ + unsigned int i_pino; /* parent inode number */ + umode_t i_acl_mode; /* keep file acl mode temporarily */ + + /* Use below internally in f2fs*/ + unsigned long flags; /* use to pass per-file flags */ + atomic_t dirty_dents; /* # of dirty dentry pages */ + f2fs_hash_t chash; /* hash value of given file name */ + unsigned int clevel; /* maximum level of given file name */ + nid_t i_xattr_nid; /* node id that contains xattrs */ + unsigned long long xattr_ver; /* cp version of xattr modification */ + struct extent_info ext; /* in-memory extent cache entry */ +}; + +static inline void get_extent_info(struct extent_info *ext, + struct f2fs_extent i_ext) +{ + write_lock(&ext->ext_lock); + ext->fofs = le32_to_cpu(i_ext.fofs); + ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->len = le32_to_cpu(i_ext.len); + write_unlock(&ext->ext_lock); +} + +static inline void set_raw_extent(struct extent_info *ext, + struct f2fs_extent *i_ext) +{ + read_lock(&ext->ext_lock); + i_ext->fofs = cpu_to_le32(ext->fofs); + i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->len = cpu_to_le32(ext->len); + read_unlock(&ext->ext_lock); +} + +struct f2fs_nm_info { + block_t nat_blkaddr; /* base disk address of NAT */ + nid_t max_nid; /* maximum possible node ids */ + nid_t next_scan_nid; /* the next nid to be scanned */ + + /* NAT cache management */ + struct radix_tree_root nat_root;/* root of the nat entry cache */ + rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + unsigned int nat_cnt; /* the # of cached nat entries */ + struct list_head nat_entries; /* cached nat entry list (clean) */ + struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ + + /* free node ids management */ + struct list_head free_nid_list; /* a list for free nids */ + spinlock_t free_nid_list_lock; /* protect free nid list */ + unsigned int fcnt; /* the number of free node id */ + struct mutex build_lock; /* lock for build free nids */ + + /* for checkpoint */ + char *nat_bitmap; /* NAT bitmap pointer */ + int bitmap_size; /* bitmap size */ +}; + +/* + * this structure is used as one of function parameters. + * all the information are dedicated to a given direct node block determined + * by the data offset in a file. + */ +struct dnode_of_data { + struct inode *inode; /* vfs inode pointer */ + struct page *inode_page; /* its inode page, NULL is possible */ + struct page *node_page; /* cached direct node page */ + nid_t nid; /* node id of the direct node block */ + unsigned int ofs_in_node; /* data offset in the node page */ + bool inode_page_locked; /* inode page is locked or not */ + block_t data_blkaddr; /* block address of the node block */ +}; + +static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, + struct page *ipage, struct page *npage, nid_t nid) +{ + memset(dn, 0, sizeof(*dn)); + dn->inode = inode; + dn->inode_page = ipage; + dn->node_page = npage; + dn->nid = nid; +} + +/* + * For SIT manager + * + * By default, there are 6 active log areas across the whole main area. + * When considering hot and cold data separation to reduce cleaning overhead, + * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, + * respectively. + * In the current design, you should not change the numbers intentionally. + * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 + * logs individually according to the underlying devices. (default: 6) + * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for + * data and 8 for node logs. + */ +#define NR_CURSEG_DATA_TYPE (3) +#define NR_CURSEG_NODE_TYPE (3) +#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) + +enum { + CURSEG_HOT_DATA = 0, /* directory entry blocks */ + CURSEG_WARM_DATA, /* data blocks */ + CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ + CURSEG_HOT_NODE, /* direct node blocks of directory files */ + CURSEG_WARM_NODE, /* direct node blocks of normal files */ + CURSEG_COLD_NODE, /* indirect node blocks */ + NO_CHECK_TYPE +}; + +struct f2fs_sm_info { + struct sit_info *sit_info; /* whole segment information */ + struct free_segmap_info *free_info; /* free segment information */ + struct dirty_seglist_info *dirty_info; /* dirty segment information */ + struct curseg_info *curseg_array; /* active segment information */ + + struct list_head wblist_head; /* list of under-writeback pages */ + spinlock_t wblist_lock; /* lock for checkpoint */ + + block_t seg0_blkaddr; /* block address of 0'th segment */ + block_t main_blkaddr; /* start block address of main area */ + block_t ssa_blkaddr; /* start block address of SSA area */ + + unsigned int segment_count; /* total # of segments */ + unsigned int main_segments; /* # of segments in main area */ + unsigned int reserved_segments; /* # of reserved segments */ + unsigned int ovp_segments; /* # of overprovision segments */ +}; + +/* + * For superblock + */ +/* + * COUNT_TYPE for monitoring + * + * f2fs monitors the number of several block types such as on-writeback, + * dirty dentry blocks, dirty node blocks, and dirty meta blocks. + */ +enum count_type { + F2FS_WRITEBACK, + F2FS_DIRTY_DENTS, + F2FS_DIRTY_NODES, + F2FS_DIRTY_META, + NR_COUNT_TYPE, +}; + +/* + * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. + * The checkpoint procedure blocks all the locks in this fs_lock array. + * Some FS operations grab free locks, and if there is no free lock, + * then wait to grab a lock in a round-robin manner. + */ +#define NR_GLOBAL_LOCKS 8 + +/* + * The below are the page types of bios used in submti_bio(). + * The available types are: + * DATA User data pages. It operates as async mode. + * NODE Node pages. It operates as async mode. + * META FS metadata pages such as SIT, NAT, CP. + * NR_PAGE_TYPE The number of page types. + * META_FLUSH Make sure the previous pages are written + * with waiting the bio's completion + * ... Only can be used with META. + */ +enum page_type { + DATA, + NODE, + META, + NR_PAGE_TYPE, + META_FLUSH, +}; + +/* + * Android sdcard emulation flags + */ +#define F2FS_ANDROID_EMU_NOCASE 0x00000001 + +struct f2fs_sb_info { + struct super_block *sb; /* pointer to VFS super block */ + struct proc_dir_entry *s_proc; /* proc entry */ + struct buffer_head *raw_super_buf; /* buffer head of raw sb */ + struct f2fs_super_block *raw_super; /* raw super block pointer */ + int s_dirty; /* dirty flag for checkpoint */ + + /* for node-related operations */ + struct f2fs_nm_info *nm_info; /* node manager */ + struct inode *node_inode; /* cache node blocks */ + + /* for segment-related operations */ + struct f2fs_sm_info *sm_info; /* segment manager */ + struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ + sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ + struct rw_semaphore bio_sem; /* IO semaphore */ + + /* for checkpoint */ + struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + struct inode *meta_inode; /* cache meta blocks */ + struct mutex cp_mutex; /* checkpoint procedure lock */ + struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ + struct mutex node_write; /* locking node writes */ + struct mutex writepages; /* mutex for writepages() */ + unsigned char next_lock_num; /* round-robin global locks */ + int por_doing; /* recovery is doing or not */ + int on_build_free_nids; /* build_free_nids is doing */ + + /* for orphan inode management */ + struct list_head orphan_inode_list; /* orphan inode list */ + struct mutex orphan_inode_mutex; /* for orphan inode list */ + unsigned int n_orphans; /* # of orphan inodes */ + + /* for directory inode management */ + struct list_head dir_inode_list; /* dir inode list */ + spinlock_t dir_inode_lock; /* for dir inode list lock */ + + /* basic file system units */ + unsigned int log_sectors_per_block; /* log2 sectors per block */ + unsigned int log_blocksize; /* log2 block size */ + unsigned int blocksize; /* block size */ + unsigned int root_ino_num; /* root inode number*/ + unsigned int node_ino_num; /* node inode number*/ + unsigned int meta_ino_num; /* meta inode number*/ + unsigned int log_blocks_per_seg; /* log2 blocks per segment */ + unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int segs_per_sec; /* segments per section */ + unsigned int secs_per_zone; /* sections per zone */ + unsigned int total_sections; /* total section count */ + unsigned int total_node_count; /* total node block count */ + unsigned int total_valid_node_count; /* valid node block count */ + unsigned int total_valid_inode_count; /* valid inode count */ + int active_logs; /* # of active logs */ + + block_t user_block_count; /* # of user blocks */ + block_t total_valid_block_count; /* # of valid blocks */ + block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t last_valid_block_count; /* for recovery */ + u32 s_next_generation; /* for NFS support */ + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ + + struct f2fs_mount_info mount_opt; /* mount options */ + + /* for cleaning operations */ + struct mutex gc_mutex; /* mutex for GC */ + struct f2fs_gc_kthread *gc_thread; /* GC thread */ + unsigned int cur_victim_sec; /* current victim section num */ + + /* + * for stat information. + * one is for the LFS mode, and the other is for the SSR mode. + */ +#ifdef CONFIG_F2FS_STAT_FS + struct f2fs_stat_info *stat_info; /* FS status information */ + unsigned int segment_count[2]; /* # of allocated segments */ + unsigned int block_count[2]; /* # of allocated blocks */ + int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ + int bg_gc; /* background gc calls */ + unsigned int n_dirty_dirs; /* # of dir inodes */ +#endif + unsigned int last_victim[2]; /* last victim segment # */ + spinlock_t stat_lock; /* lock for stat operations */ + + /* For sysfs suppport */ + struct kobject s_kobj; + struct completion s_kobj_unregister; + + /* For Android sdcard emulation */ + u32 android_emu_uid; + u32 android_emu_gid; + umode_t android_emu_mode; + int android_emu_flags; +}; + +/* + * Inline functions + */ +static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) +{ + return container_of(inode, struct f2fs_inode_info, vfs_inode); +} + +static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_super_block *)(sbi->raw_super); +} + +static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_checkpoint *)(sbi->ckpt); +} + +static inline struct f2fs_node *F2FS_NODE(struct page *page) +{ + return (struct f2fs_node *)page_address(page); +} + +static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_nm_info *)(sbi->nm_info); +} + +static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_sm_info *)(sbi->sm_info); +} + +static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) +{ + return (struct sit_info *)(SM_I(sbi)->sit_info); +} + +static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) +{ + return (struct free_segmap_info *)(SM_I(sbi)->free_info); +} + +static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) +{ + return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); +} + +static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 1; +} + +static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 0; +} + +static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) +{ + return le64_to_cpu(cp->checkpoint_ver); +} + +static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + return ckpt_flags & f; +} + +static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags |= f; + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags &= (~f); + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void mutex_lock_all(struct f2fs_sb_info *sbi) +{ + int i; + + for (i = 0; i < NR_GLOBAL_LOCKS; i++) { + /* + * This is the only time we take multiple fs_lock[] + * instances; the order is immaterial since we + * always hold cp_mutex, which serializes multiple + * such operations. + */ + mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); + } +} + +static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) +{ + int i = 0; + for (; i < NR_GLOBAL_LOCKS; i++) + mutex_unlock(&sbi->fs_lock[i]); +} + +static inline int mutex_lock_op(struct f2fs_sb_info *sbi) +{ + unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; + int i = 0; + + for (; i < NR_GLOBAL_LOCKS; i++) + if (mutex_trylock(&sbi->fs_lock[i])) + return i; + + mutex_lock(&sbi->fs_lock[next_lock]); + sbi->next_lock_num++; + return next_lock; +} + +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) +{ + if (ilock < 0) + return; + BUG_ON(ilock >= NR_GLOBAL_LOCKS); + mutex_unlock(&sbi->fs_lock[ilock]); +} + +/* + * Check whether the given nid is within node id range. + */ +static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + WARN_ON((nid >= NM_I(sbi)->max_nid)); + if (nid >= NM_I(sbi)->max_nid) + return -EINVAL; + return 0; +} + +#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 + +/* + * Check whether the inode has blocks or not + */ +static inline int F2FS_HAS_BLOCKS(struct inode *inode) +{ + if (F2FS_I(inode)->i_xattr_nid) + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); + else + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); +} + +static inline int f2fs_handle_error(struct f2fs_sb_info *sbi) +{ + if (test_opt(sbi, ERRORS_PANIC)) + BUG(); + if (test_opt(sbi, ERRORS_RECOVER)) + return 1; + return 0; +} + +static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, blkcnt_t count) +{ + block_t valid_block_count; + + spin_lock(&sbi->stat_lock); + valid_block_count = + sbi->total_valid_block_count + (block_t)count; + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + inode->i_blocks += count; + sbi->total_valid_block_count = valid_block_count; + sbi->alloc_valid_block_count += (block_t)count; + spin_unlock(&sbi->stat_lock); + return true; +} + +static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, + blkcnt_t count) +{ + spin_lock(&sbi->stat_lock); + + if (sbi->total_valid_block_count < (block_t)count) { + pr_crit("F2FS-fs (%s): block accounting error: %u < %llu\n", + sbi->sb->s_id, sbi->total_valid_block_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_block_count = count; + } + if (inode->i_blocks < count) { + pr_crit("F2FS-fs (%s): inode accounting error: %llu < %llu\n", + sbi->sb->s_id, inode->i_blocks, count); + f2fs_handle_error(sbi); + inode->i_blocks = count; + } + + inode->i_blocks -= count; + sbi->total_valid_block_count -= (block_t)count; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_inc(&sbi->nr_pages[count_type]); + F2FS_SET_SB_DIRT(sbi); +} + +static inline void inode_inc_dirty_dents(struct inode *inode) +{ + atomic_inc(&F2FS_I(inode)->dirty_dents); +} + +static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_dec(&sbi->nr_pages[count_type]); +} + +static inline void inode_dec_dirty_dents(struct inode *inode) +{ + atomic_dec(&F2FS_I(inode)->dirty_dents); +} + +static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) +{ + return atomic_read(&sbi->nr_pages[count_type]); +} + +static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) +{ + unsigned int pages_per_sec = sbi->segs_per_sec * + (1 << sbi->log_blocks_per_seg); + return ((get_pages(sbi, block_type) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; +} + +static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_block_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + /* return NAT or SIT bitmap */ + if (flag == NAT_BITMAP) + return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + else if (flag == SIT_BITMAP) + return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + + return 0; +} + +static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + int offset = (flag == NAT_BITMAP) ? + le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; +} + +static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_version = cur_cp_version(ckpt); + + start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + /* + * odd numbered checkpoint should at cp segment 0 + * and even segent must be at cp segment 1 + */ + if (!(ckpt_version & 1)) + start_addr += sbi->blocks_per_seg; + + return start_addr; +} + +static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + block_t valid_block_count; + unsigned int valid_node_count; + + spin_lock(&sbi->stat_lock); + + valid_block_count = sbi->total_valid_block_count + (block_t)count; + valid_node_count = sbi->total_valid_node_count + count; + + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (valid_node_count > sbi->total_node_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (inode) + inode->i_blocks += count; + sbi->alloc_valid_block_count += (block_t)count; + sbi->total_valid_node_count = valid_node_count; + sbi->total_valid_block_count = valid_block_count; + spin_unlock(&sbi->stat_lock); + + return true; +} + +static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + spin_lock(&sbi->stat_lock); + + if (sbi->total_valid_block_count < count) { + pr_crit("F2FS-fs (%s): block accounting error: %u < %u\n", + sbi->sb->s_id, sbi->total_valid_block_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_block_count = count; + } + if (sbi->total_valid_node_count < count) { + pr_crit("F2FS-fs (%s): node accounting error: %u < %u\n", + sbi->sb->s_id, sbi->total_valid_node_count, count); + f2fs_handle_error(sbi); + sbi->total_valid_node_count = count; + } + if (inode->i_blocks < count) { + pr_crit("F2FS-fs (%s): inode accounting error: %llu < %u\n", + sbi->sb->s_id, inode->i_blocks, count); + f2fs_handle_error(sbi); + inode->i_blocks = count; + } + + inode->i_blocks -= count; + sbi->total_valid_node_count -= count; + sbi->total_valid_block_count -= (block_t)count; + + spin_unlock(&sbi->stat_lock); +} + +static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_node_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); + sbi->total_valid_inode_count++; + spin_unlock(&sbi->stat_lock); +} + +static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(!sbi->total_valid_inode_count); + sbi->total_valid_inode_count--; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_inode_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void f2fs_put_page(struct page *page, int unlock) +{ + if (!page || IS_ERR(page)) + return; + + if (unlock) { + BUG_ON(!PageLocked(page)); + unlock_page(page); + } + page_cache_release(page); +} + +static inline void f2fs_put_dnode(struct dnode_of_data *dn) +{ + if (dn->node_page) + f2fs_put_page(dn->node_page, 1); + if (dn->inode_page && dn->node_page != dn->inode_page) + f2fs_put_page(dn->inode_page, 0); + dn->node_page = NULL; + dn->inode_page = NULL; +} + +static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, + size_t size, void (*ctor)(void *)) +{ + return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); +} + +#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) + +static inline bool IS_INODE(struct page *page) +{ + struct f2fs_node *p = F2FS_NODE(page); + return RAW_IS_INODE(p); +} + +static inline __le32 *blkaddr_in_node(struct f2fs_node *node) +{ + return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; +} + +static inline block_t datablock_addr(struct page *node_page, + unsigned int offset) +{ + struct f2fs_node *raw_node; + __le32 *addr_array; + raw_node = F2FS_NODE(node_page); + addr_array = blkaddr_in_node(raw_node); + return le32_to_cpu(addr_array[offset]); +} + +static inline int f2fs_test_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + return mask & *addr; +} + +static inline int f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr |= mask; + return ret; +} + +static inline int f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr &= ~mask; + return ret; +} + +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_UPDATE_DIR, /* should update inode block for consistency */ + FI_DELAY_IPUT, /* used for the recovery */ + FI_INLINE_XATTR, /* used for inline xattr */ +}; + +static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + set_bit(flag, &fi->flags); +} + +static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +{ + return test_bit(flag, &fi->flags); +} + +static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + clear_bit(flag, &fi->flags); +} + +static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +{ + fi->i_acl_mode = mode; + set_inode_flag(fi, FI_ACL_MODE); +} + +static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + clear_inode_flag(fi, FI_ACL_MODE); + return 1; + } + return 0; +} + +int f2fs_android_emu(struct f2fs_sb_info *, struct inode *, u32 *, u32 *, + umode_t *); + +#define IS_ANDROID_EMU(sbi, fi, pfi) \ + (test_opt((sbi), ANDROID_EMU) && \ + (((fi)->i_advise & FADVISE_ANDROID_EMU) || \ + ((pfi)->i_advise & FADVISE_ANDROID_EMU))) + +static inline void get_inline_info(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + if (ri->i_inline & F2FS_INLINE_XATTR) + set_inode_flag(fi, FI_INLINE_XATTR); +} + +static inline void set_raw_inline(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + ri->i_inline = 0; + + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + ri->i_inline |= F2FS_INLINE_XATTR; +} + +static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) +{ + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; + return DEF_ADDRS_PER_INODE; +} + +static inline void *inline_xattr_addr(struct page *page) +{ + struct f2fs_inode *ri; + ri = (struct f2fs_inode *)page_address(page); + return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - + F2FS_INLINE_XATTR_ADDRS]); +} + +static inline int inline_xattr_size(struct inode *inode) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) + return F2FS_INLINE_XATTR_ADDRS << 2; + else + return 0; +} + +static inline int f2fs_readonly(struct super_block *sb) +{ + return sb->s_flags & MS_RDONLY; +} + +/* + * file.c + */ +int f2fs_sync_file(struct file *, loff_t, loff_t, int); +void truncate_data_blocks(struct dnode_of_data *); +void f2fs_truncate(struct inode *); +int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +int f2fs_setattr(struct dentry *, struct iattr *); +int truncate_hole(struct inode *, pgoff_t, pgoff_t); +int truncate_data_blocks_range(struct dnode_of_data *, int); +long f2fs_ioctl(struct file *, unsigned int, unsigned long); +long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); + +/* + * inode.c + */ +void f2fs_set_inode_flags(struct inode *); +struct inode *f2fs_iget(struct super_block *, unsigned long); +void update_inode(struct inode *, struct page *); +int update_inode_page(struct inode *); +int f2fs_write_inode(struct inode *, struct writeback_control *); +void f2fs_evict_inode(struct inode *); + +/* + * namei.c + */ +struct dentry *f2fs_get_parent(struct dentry *child); + +/* + * dir.c + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, + struct page *, struct inode *); +int update_dent_inode(struct inode *, const struct qstr *); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_make_empty(struct inode *, struct inode *); +bool f2fs_empty_dir(struct inode *); + +static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) +{ + return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, + inode); +} + +/* + * super.c + */ +int f2fs_sync_fs(struct super_block *, int); +extern __printf(3, 4) +void f2fs_msg(struct super_block *, const char *, const char *, ...); + +/* + * hash.c + */ +f2fs_hash_t f2fs_dentry_hash(const char *, size_t); + +/* + * node.c + */ +struct dnode_of_data; +struct node_info; + +int is_checkpointed_node(struct f2fs_sb_info *, nid_t); +void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); +int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); +int truncate_inode_blocks(struct inode *, pgoff_t); +int truncate_xattr_node(struct inode *, struct page *); +int remove_inode_page(struct inode *); +struct page *new_inode_page(struct inode *, const struct qstr *); +struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); +void ra_node_page(struct f2fs_sb_info *, nid_t); +struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_node_page_ra(struct page *, int); +void sync_inode_page(struct dnode_of_data *); +int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +bool alloc_nid(struct f2fs_sb_info *, nid_t *); +void alloc_nid_done(struct f2fs_sb_info *, nid_t); +void alloc_nid_failed(struct f2fs_sb_info *, nid_t); +void recover_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, struct node_info *, block_t); +int recover_inode_page(struct f2fs_sb_info *, struct page *); +int restore_node_summary(struct f2fs_sb_info *, unsigned int, + struct f2fs_summary_block *); +void flush_nat_entries(struct f2fs_sb_info *); +int build_node_manager(struct f2fs_sb_info *); +void destroy_node_manager(struct f2fs_sb_info *); +int __init create_node_manager_caches(void); +void destroy_node_manager_caches(void); + +/* + * segment.c + */ +void f2fs_balance_fs(struct f2fs_sb_info *); +void invalidate_blocks(struct f2fs_sb_info *, block_t); +void clear_prefree_segments(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *); +void allocate_new_segments(struct f2fs_sb_info *); +struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +struct bio *f2fs_bio_alloc(struct block_device *, int); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); +void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); +void write_meta_page(struct f2fs_sb_info *, struct page *); +void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, + block_t, block_t *); +void write_data_page(struct inode *, struct page *, struct dnode_of_data*, + block_t, block_t *); +void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); +void recover_data_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void rewrite_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void write_data_summaries(struct f2fs_sb_info *, block_t); +void write_node_summaries(struct f2fs_sb_info *, block_t); +int lookup_journal_in_cursum(struct f2fs_summary_block *, + int, unsigned int, int); +void flush_sit_entries(struct f2fs_sb_info *); +int build_segment_manager(struct f2fs_sb_info *); +void destroy_segment_manager(struct f2fs_sb_info *); + +/* + * checkpoint.c + */ +struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +int acquire_orphan_inode(struct f2fs_sb_info *); +void release_orphan_inode(struct f2fs_sb_info *); +void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void remove_orphan_inode(struct f2fs_sb_info *, nid_t); +int recover_orphan_inodes(struct f2fs_sb_info *); +int get_valid_checkpoint(struct f2fs_sb_info *); +void set_dirty_dir_page(struct inode *, struct page *); +void add_dirty_dir_inode(struct inode *); +void remove_dirty_dir_inode(struct inode *); +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); +void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void write_checkpoint(struct f2fs_sb_info *, bool); +void init_orphan_info(struct f2fs_sb_info *); +int __init create_checkpoint_caches(void); +void destroy_checkpoint_caches(void); + +/* + * data.c + */ +int reserve_new_block(struct dnode_of_data *); +void update_extent_cache(block_t, struct dnode_of_data *); +struct page *find_data_page(struct inode *, pgoff_t, bool); +struct page *get_lock_data_page(struct inode *, pgoff_t); +struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); +int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); +int do_write_data_page(struct page *); + +/* + * gc.c + */ +int start_gc_thread(struct f2fs_sb_info *); +void stop_gc_thread(struct f2fs_sb_info *); +block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); +int f2fs_gc(struct f2fs_sb_info *); +void build_gc_manager(struct f2fs_sb_info *); +int __init create_gc_caches(void); +void destroy_gc_caches(void); + +/* + * recovery.c + */ +int recover_fsync_data(struct f2fs_sb_info *); +bool space_for_roll_forward(struct f2fs_sb_info *); + +/* + * debug.c + */ +#ifdef CONFIG_F2FS_STAT_FS +struct f2fs_stat_info { + struct list_head stat_list; + struct f2fs_sb_info *sbi; + struct mutex stat_lock; + int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; + int main_area_segs, main_area_sections, main_area_zones; + int hit_ext, total_ext; + int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int nats, sits, fnids; + int total_count, utilization; + int bg_gc; + unsigned int valid_count, valid_node_count, valid_inode_count; + unsigned int bimodal, avg_vblocks; + int util_free, util_valid, util_invalid; + int rsvd_segs, overp_segs; + int dirty_count, node_pages, meta_pages; + int prefree_count, call_count; + int tot_segs, node_segs, data_segs, free_segs, free_secs; + int tot_blks, data_blks, node_blks; + int curseg[NR_CURSEG_TYPE]; + int cursec[NR_CURSEG_TYPE]; + int curzone[NR_CURSEG_TYPE]; + + unsigned int segment_count[2]; + unsigned int block_count[2]; + unsigned base_mem, cache_mem; +}; + +static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_stat_info*)sbi->stat_info; +} + +#define stat_inc_call_count(si) ((si)->call_count++) + +#define stat_inc_seg_count(sbi, type) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + (si)->tot_segs++; \ + if (type == SUM_TYPE_DATA) \ + si->data_segs++; \ + else \ + si->node_segs++; \ + } while (0) + +#define stat_inc_tot_blk_count(si, blks) \ + (si->tot_blks += (blks)) + +#define stat_inc_data_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + stat_inc_tot_blk_count(si, blks); \ + si->data_blks += (blks); \ + } while (0) + +#define stat_inc_node_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ + stat_inc_tot_blk_count(si, blks); \ + si->node_blks += (blks); \ + } while (0) + +int f2fs_build_stats(struct f2fs_sb_info *); +void f2fs_destroy_stats(struct f2fs_sb_info *); +void __init f2fs_create_root_stats(void); +void f2fs_destroy_root_stats(void); +#else +#define stat_inc_call_count(si) +#define stat_inc_seg_count(si, type) +#define stat_inc_tot_blk_count(si, blks) +#define stat_inc_data_blk_count(si, blks) +#define stat_inc_node_blk_count(sbi, blks) + +static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } +static inline void __init f2fs_create_root_stats(void) { } +static inline void f2fs_destroy_root_stats(void) { } +#endif + +extern const struct file_operations f2fs_dir_operations; +extern const struct file_operations f2fs_file_operations; +extern const struct inode_operations f2fs_file_inode_operations; +extern const struct address_space_operations f2fs_dblock_aops; +extern const struct address_space_operations f2fs_node_aops; +extern const struct address_space_operations f2fs_meta_aops; +extern const struct inode_operations f2fs_dir_inode_operations; +extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_special_inode_operations; +#endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c new file mode 100644 index 00000000000..5249b9fb114 --- /dev/null +++ b/fs/f2fs/file.c @@ -0,0 +1,727 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/file.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "xattr.h" +#include "acl.h" +#include + +static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + block_t old_blk_addr; + struct dnode_of_data dn; + int err, ilock; + + f2fs_balance_fs(sbi); + + /* Wait if fs is frozen. This is racy so we check again later on + * and retry if the fs has been frozen after the page lock has + * been acquired + */ + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + /* block allocation */ + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); + if (err) { + mutex_unlock_op(sbi, ilock); + goto out; + } + + old_blk_addr = dn.data_blkaddr; + + if (old_blk_addr == NULL_ADDR) { + err = reserve_new_block(&dn); + if (err) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + goto out; + } + } + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + file_update_time(vma->vm_file); + lock_page(page); + if (page->mapping != inode->i_mapping || + page_offset(page) > i_size_read(inode) || + !PageUptodate(page)) { + unlock_page(page); + err = -EFAULT; + goto out; + } + + /* + * check to see if the page is mapped already (no holes) + */ + if (PageMappedToDisk(page)) + goto mapped; + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { + unsigned offset; + offset = i_size_read(inode) & ~PAGE_CACHE_MASK; + zero_user_segment(page, offset, PAGE_CACHE_SIZE); + } + set_page_dirty(page); + SetPageUptodate(page); + +mapped: + /* fill the page */ + wait_on_page_writeback(page); +out: + return block_page_mkwrite_return(err); +} + +static const struct vm_operations_struct f2fs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = f2fs_vm_page_mkwrite, +}; + +static int get_parent_ino(struct inode *inode, nid_t *pino) +{ + struct dentry *dentry; + + inode = igrab(inode); + + /* Alex - the following is equivalent to: dentry = d_find_any_alias(inode); */ + dentry = NULL; + spin_lock(&inode->i_lock); + if (!list_empty(&inode->i_dentry)) { + dentry = list_first_entry(&inode->i_dentry, + struct dentry, d_alias); + dget(dentry); + } + spin_unlock(&inode->i_lock); + + iput(inode); + if (!dentry) + return 0; + + if (update_dent_inode(inode, &dentry->d_name)) { + dput(dentry); + return 0; + } + + *pino = parent_ino(dentry); + dput(dentry); + return 1; +} + +int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = file->f_mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret = 0; + bool need_cp = false; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + + if (f2fs_readonly(inode->i_sb)) + return 0; + + trace_f2fs_sync_file_enter(inode); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) { + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + return ret; + } + + /* guarantee free sections for fsync */ + f2fs_balance_fs(sbi); + + mutex_lock(&inode->i_mutex); + + /* + * Both of fdatasync() and fsync() are able to be recovered from + * sudden-power-off. + */ + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) + need_cp = true; + else if (file_wrong_pino(inode)) + need_cp = true; + else if (!space_for_roll_forward(sbi)) + need_cp = true; + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + need_cp = true; + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) + need_cp = true; + + if (need_cp) { + nid_t pino; + + F2FS_I(inode)->xattr_ver = 0; + + /* all the dirty node pages should be flushed for POR */ + ret = f2fs_sync_fs(inode->i_sb, 1); + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + F2FS_I(inode)->i_pino = pino; + file_got_pino(inode); + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } + } else { + /* if there is no written node page, write its inode page */ + while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } + filemap_fdatawait_range(sbi->node_inode->i_mapping, + 0, LONG_MAX); + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + } +out: + mutex_unlock(&inode->i_mutex); + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + return ret; +} + +static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + vma->vm_ops = &f2fs_file_vm_ops; + return 0; +} + +int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +{ + int nr_free = 0, ofs = dn->ofs_in_node; + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_node *raw_node; + __le32 *addr; + + raw_node = F2FS_NODE(dn->node_page); + addr = blkaddr_in_node(raw_node) + ofs; + + for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { + block_t blkaddr = le32_to_cpu(*addr); + if (blkaddr == NULL_ADDR) + continue; + + update_extent_cache(NULL_ADDR, dn); + invalidate_blocks(sbi, blkaddr); + nr_free++; + } + if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); + set_page_dirty(dn->node_page); + sync_inode_page(dn); + } + dn->ofs_in_node = ofs; + + trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, + dn->ofs_in_node, nr_free); + return nr_free; +} + +void truncate_data_blocks(struct dnode_of_data *dn) +{ + truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); +} + +static void truncate_partial_data_page(struct inode *inode, u64 from) +{ + unsigned offset = from & (PAGE_CACHE_SIZE - 1); + struct page *page; + + if (!offset) + return; + + page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); + if (IS_ERR(page)) + return; + + lock_page(page); + if (page->mapping != inode->i_mapping) { + f2fs_put_page(page, 1); + return; + } + wait_on_page_writeback(page); + zero_user(page, offset, PAGE_CACHE_SIZE - offset); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static int truncate_blocks(struct inode *inode, u64 from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + unsigned int blocksize = inode->i_sb->s_blocksize; + struct dnode_of_data dn; + pgoff_t free_from; + int count = 0, ilock = -1; + int err; + + trace_f2fs_truncate_blocks_enter(inode, from); + + free_from = (pgoff_t) + ((from + blocksize - 1) >> (sbi->log_blocksize)); + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); + if (err) { + if (err == -ENOENT) + goto free_next; + mutex_unlock_op(sbi, ilock); + trace_f2fs_truncate_blocks_exit(inode, err); + return err; + } + + if (IS_INODE(dn.node_page)) + count = ADDRS_PER_INODE(F2FS_I(inode)); + else + count = ADDRS_PER_BLOCK; + + count -= dn.ofs_in_node; + BUG_ON(count < 0); + + if (dn.ofs_in_node || IS_INODE(dn.node_page)) { + truncate_data_blocks_range(&dn, count); + free_from += count; + } + + f2fs_put_dnode(&dn); +free_next: + err = truncate_inode_blocks(inode, free_from); + mutex_unlock_op(sbi, ilock); + + /* lastly zero out the first data page */ + truncate_partial_data_page(inode, from); + + trace_f2fs_truncate_blocks_exit(inode, err); + return err; +} + +void f2fs_truncate(struct inode *inode) +{ + int err; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + + trace_f2fs_truncate(inode); + + err = truncate_blocks(inode, i_size_read(inode)); + if (err) { + f2fs_msg(inode->i_sb, KERN_ERR, "truncate failed with %d", + err); + f2fs_handle_error(F2FS_SB(inode->i_sb)); + } else { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } +} + +int f2fs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blocks <<= 3; + return 0; +} + +#ifdef CONFIG_F2FS_FS_POSIX_ACL +static void __setattr_copy(struct inode *inode, const struct iattr *attr) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int ia_valid = attr->ia_valid; + + if (ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (ia_valid & ATTR_ATIME) + inode->i_atime = timespec_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + inode->i_mtime = timespec_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + inode->i_ctime = timespec_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + set_acl_inode(fi, mode); + } +} +#else +#define __setattr_copy setattr_copy +#endif + +int f2fs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode_info *pfi = F2FS_I(dentry->d_parent->d_inode); + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (IS_ANDROID_EMU(sbi, fi, pfi)) + f2fs_android_emu(sbi, inode, &attr->ia_uid, &attr->ia_gid, + &attr->ia_mode); + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); + f2fs_truncate(inode); + f2fs_balance_fs(F2FS_SB(inode->i_sb)); + } + + __setattr_copy(inode, attr); + + if (attr->ia_valid & ATTR_MODE) { + err = f2fs_acl_chmod(inode); + if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + clear_inode_flag(fi, FI_ACL_MODE); + } + } + + mark_inode_dirty(inode); + return err; +} + +const struct inode_operations f2fs_file_inode_operations = { + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +static void fill_zero(struct inode *inode, pgoff_t index, + loff_t start, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + int ilock; + + if (!len) + return; + + f2fs_balance_fs(sbi); + + ilock = mutex_lock_op(sbi); + page = get_new_data_page(inode, NULL, index, false); + mutex_unlock_op(sbi, ilock); + + if (!IS_ERR(page)) { + wait_on_page_writeback(page); + zero_user(page, start, len); + set_page_dirty(page); + f2fs_put_page(page, 1); + } +} + +int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) +{ + pgoff_t index; + int err; + + for (index = pg_start; index < pg_end; index++) { + struct dnode_of_data dn; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) { + if (err == -ENOENT) + continue; + return err; + } + + if (dn.data_blkaddr != NULL_ADDR) + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + } + return 0; +} + +static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) +{ + pgoff_t pg_start, pg_end; + loff_t off_start, off_end; + int ret = 0; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + if (pg_start == pg_end) { + fill_zero(inode, pg_start, off_start, + off_end - off_start); + } else { + if (off_start) + fill_zero(inode, pg_start++, off_start, + PAGE_CACHE_SIZE - off_start); + if (off_end) + fill_zero(inode, pg_end, 0, off_end); + + if (pg_start < pg_end) { + struct address_space *mapping = inode->i_mapping; + loff_t blk_start, blk_end; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + + f2fs_balance_fs(sbi); + + blk_start = pg_start << PAGE_CACHE_SHIFT; + blk_end = pg_end << PAGE_CACHE_SHIFT; + truncate_inode_pages_range(mapping, blk_start, + blk_end - 1); + + ilock = mutex_lock_op(sbi); + ret = truncate_hole(inode, pg_start, pg_end); + mutex_unlock_op(sbi, ilock); + } + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) <= (offset + len)) { + i_size_write(inode, offset); + mark_inode_dirty(inode); + } + + return ret; +} + +static int expand_inode_data(struct inode *inode, loff_t offset, + loff_t len, int mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + pgoff_t index, pg_start, pg_end; + loff_t new_size = i_size_read(inode); + loff_t off_start, off_end; + int ret = 0; + + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) + return ret; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + for (index = pg_start; index <= pg_end; index++) { + struct dnode_of_data dn; + int ilock; + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, index, ALLOC_NODE); + if (ret) { + mutex_unlock_op(sbi, ilock); + break; + } + + if (dn.data_blkaddr == NULL_ADDR) { + ret = reserve_new_block(&dn); + if (ret) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + break; + } + } + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + if (pg_start == pg_end) + new_size = offset + len; + else if (index == pg_start && off_start) + new_size = (index + 1) << PAGE_CACHE_SHIFT; + else if (index == pg_end) + new_size = (index << PAGE_CACHE_SHIFT) + off_end; + else + new_size += PAGE_CACHE_SIZE; + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) < new_size) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + } + + return ret; +} + +static long f2fs_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + long ret; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = punch_hole(inode, offset, len, mode); + else + ret = expand_inode_data(inode, offset, len, mode); + + if (!ret) { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + trace_f2fs_fallocate(inode, mode, offset, len, ret); + return ret; +} + +#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) +#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) + +static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & F2FS_REG_FLMASK; + else + return flags & F2FS_OTHER_FLMASK; +} + +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int flags; + int ret; + + switch (cmd) { + case F2FS_IOC_GETFLAGS: + flags = fi->i_flags & FS_FL_USER_VISIBLE; + return put_user(flags, (int __user *) arg); + case F2FS_IOC_SETFLAGS: + { + unsigned int oldflags; + + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + return ret; + + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto out; + } + + if (get_user(flags, (int __user *) arg)) { + ret = -EFAULT; + goto out; + } + + flags = f2fs_mask_flags(inode->i_mode, flags); + + mutex_lock(&inode->i_mutex); + + oldflags = fi->i_flags; + + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + ret = -EPERM; + goto out; + } + } + + flags = flags & FS_FL_USER_MODIFIABLE; + flags |= oldflags & ~FS_FL_USER_MODIFIABLE; + fi->i_flags = flags; + mutex_unlock(&inode->i_mutex); + + f2fs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); +out: + mnt_drop_write(filp->f_path.mnt); + return ret; + } + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case F2FS_IOC32_GETFLAGS: + cmd = F2FS_IOC_GETFLAGS; + break; + case F2FS_IOC32_SETFLAGS: + cmd = F2FS_IOC_SETFLAGS; + break; + default: + return -ENOIOCTLCMD; + } + return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); +} +#endif + +const struct file_operations f2fs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .open = generic_file_open, + .mmap = f2fs_file_mmap, + .fsync = f2fs_sync_file, + .fallocate = f2fs_fallocate, + .unlocked_ioctl = f2fs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = f2fs_compat_ioctl, +#endif + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c new file mode 100644 index 00000000000..df447eab869 --- /dev/null +++ b/fs/f2fs/gc.c @@ -0,0 +1,740 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/gc.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" +#include + +static struct kmem_cache *winode_slab; + +static int gc_thread_func(void *data) +{ + struct f2fs_sb_info *sbi = data; + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + long wait_ms; + + wait_ms = gc_th->min_sleep_time; + + do { + if (try_to_freeze()) + continue; + else + wait_event_interruptible_timeout(*wq, + kthread_should_stop(), + msecs_to_jiffies(wait_ms)); + if (kthread_should_stop()) + break; + + /* + * [GC triggering condition] + * 0. GC is not conducted currently. + * 1. There are enough dirty segments. + * 2. IO subsystem is idle by checking the # of writeback pages. + * 3. IO subsystem is idle by checking the # of requests in + * bdev's request list. + * + * Note) We have to avoid triggering GCs too much frequently. + * Because it is possible that some segments can be + * invalidated soon after by user update or deletion. + * So, I'd like to wait some time to collect dirty segments. + */ + if (!mutex_trylock(&sbi->gc_mutex)) + continue; + + if (!is_idle(sbi)) { + wait_ms = increase_sleep_time(gc_th, wait_ms); + mutex_unlock(&sbi->gc_mutex); + continue; + } + + if (has_enough_invalid_blocks(sbi)) + wait_ms = decrease_sleep_time(gc_th, wait_ms); + else + wait_ms = increase_sleep_time(gc_th, wait_ms); + +#ifdef CONFIG_F2FS_STAT_FS + sbi->bg_gc++; +#endif + + /* if return value is not zero, no victim was selected */ + if (f2fs_gc(sbi)) + wait_ms = gc_th->no_gc_sleep_time; + } while (!kthread_should_stop()); + return 0; +} + +int start_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th; + dev_t dev = sbi->sb->s_bdev->bd_dev; + int err = 0; + + if (!test_opt(sbi, BG_GC)) + goto out; + gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); + if (!gc_th) { + err = -ENOMEM; + goto out; + } + + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + + gc_th->gc_idle = 0; + + sbi->gc_thread = gc_th; + init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, + "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(gc_th->f2fs_gc_task)) { + err = PTR_ERR(gc_th->f2fs_gc_task); + kfree(gc_th); + sbi->gc_thread = NULL; + } + +out: + return err; +} + +void stop_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + if (!gc_th) + return; + kthread_stop(gc_th->f2fs_gc_task); + kfree(gc_th); + sbi->gc_thread = NULL; +} + +static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) +{ + int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + + if (gc_th && gc_th->gc_idle) { + if (gc_th->gc_idle == 1) + gc_mode = GC_CB; + else if (gc_th->gc_idle == 2) + gc_mode = GC_GREEDY; + } + return gc_mode; +} + +static void select_policy(struct f2fs_sb_info *sbi, int gc_type, + int type, struct victim_sel_policy *p) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (p->alloc_mode == SSR) { + p->gc_mode = GC_GREEDY; + p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->ofs_unit = 1; + } else { + p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); + p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; + p->ofs_unit = sbi->segs_per_sec; + } + p->offset = sbi->last_victim[p->gc_mode]; +} + +static unsigned int get_max_cost(struct f2fs_sb_info *sbi, + struct victim_sel_policy *p) +{ + /* SSR allocates in a segment unit */ + if (p->alloc_mode == SSR) + return 1 << sbi->log_blocks_per_seg; + if (p->gc_mode == GC_GREEDY) + return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; + else if (p->gc_mode == GC_CB) + return UINT_MAX; + else /* No other gc_mode */ + return 0; +} + +static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int hint = 0; + unsigned int secno; + + /* + * If the gc_type is FG_GC, we can select victim segments + * selected by background GC before. + * Those segments guarantee they have small valid blocks. + */ +next: + secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); + if (secno < TOTAL_SECS(sbi)) { + if (sec_usage_check(sbi, secno)) + goto next; + clear_bit(secno, dirty_i->victim_secmap); + return secno * sbi->segs_per_sec; + } + return NULL_SEGNO; +} + +static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int secno = GET_SECNO(sbi, segno); + unsigned int start = secno * sbi->segs_per_sec; + unsigned long long mtime = 0; + unsigned int vblocks; + unsigned char age = 0; + unsigned char u; + unsigned int i; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, start + i)->mtime; + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + + mtime = div_u64(mtime, sbi->segs_per_sec); + vblocks = div_u64(vblocks, sbi->segs_per_sec); + + u = (vblocks * 100) >> sbi->log_blocks_per_seg; + + /* Handle if the system time is changed by user */ + if (mtime < sit_i->min_mtime) + sit_i->min_mtime = mtime; + if (mtime > sit_i->max_mtime) + sit_i->max_mtime = mtime; + if (sit_i->max_mtime != sit_i->min_mtime) + age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), + sit_i->max_mtime - sit_i->min_mtime); + + return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); +} + +static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, + struct victim_sel_policy *p) +{ + if (p->alloc_mode == SSR) + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + /* alloc_mode == LFS */ + if (p->gc_mode == GC_GREEDY) + return get_valid_blocks(sbi, segno, sbi->segs_per_sec); + else + return get_cb_cost(sbi, segno); +} + +/* + * This function is called from two paths. + * One is garbage collection and the other is SSR segment selection. + * When it is called during GC, it just gets a victim segment + * and it does not remove it from dirty seglist. + * When it is called from SSR segment selection, it finds a segment + * which has minimum valid blocks and removes it from dirty seglist. + */ +static int get_victim_by_default(struct f2fs_sb_info *sbi, + unsigned int *result, int gc_type, int type, char alloc_mode) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct victim_sel_policy p; + unsigned int secno, max_cost; + int nsearched = 0; + + p.alloc_mode = alloc_mode; + select_policy(sbi, gc_type, type, &p); + + p.min_segno = NULL_SEGNO; + p.min_cost = max_cost = get_max_cost(sbi, &p); + + mutex_lock(&dirty_i->seglist_lock); + + if (p.alloc_mode == LFS && gc_type == FG_GC) { + p.min_segno = check_bg_victims(sbi); + if (p.min_segno != NULL_SEGNO) + goto got_it; + } + + while (1) { + unsigned long cost; + unsigned int segno; + + segno = find_next_bit(p.dirty_segmap, + TOTAL_SEGS(sbi), p.offset); + if (segno >= TOTAL_SEGS(sbi)) { + if (sbi->last_victim[p.gc_mode]) { + sbi->last_victim[p.gc_mode] = 0; + p.offset = 0; + continue; + } + break; + } + p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + secno = GET_SECNO(sbi, segno); + + if (sec_usage_check(sbi, secno)) + continue; + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) + continue; + + cost = get_gc_cost(sbi, segno, &p); + + if (p.min_cost > cost) { + p.min_segno = segno; + p.min_cost = cost; + } + + if (cost == max_cost) + continue; + + if (nsearched++ >= MAX_VICTIM_SEARCH) { + sbi->last_victim[p.gc_mode] = segno; + break; + } + } + if (p.min_segno != NULL_SEGNO) { +got_it: + if (p.alloc_mode == LFS) { + secno = GET_SECNO(sbi, p.min_segno); + if (gc_type == FG_GC) + sbi->cur_victim_sec = secno; + else + set_bit(secno, dirty_i->victim_secmap); + } + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + + trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, + sbi->cur_victim_sec, + prefree_segments(sbi), free_segments(sbi)); + } + mutex_unlock(&dirty_i->seglist_lock); + + return (p.min_segno == NULL_SEGNO) ? 0 : 1; +} + +static const struct victim_selection default_v_ops = { + .get_victim = get_victim_by_default, +}; + +static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +{ + struct inode_entry *ie; + + list_for_each_entry(ie, ilist, list) + if (ie->inode->i_ino == ino) + return ie->inode; + return NULL; +} + +static void add_gc_inode(struct inode *inode, struct list_head *ilist) +{ + struct inode_entry *new_ie; + + if (inode == find_gc_inode(inode->i_ino, ilist)) { + iput(inode); + return; + } +repeat: + new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); + if (!new_ie) { + cond_resched(); + goto repeat; + } + new_ie->inode = inode; + list_add_tail(&new_ie->list, ilist); +} + +static void put_gc_inode(struct list_head *ilist) +{ + struct inode_entry *ie, *next_ie; + list_for_each_entry_safe(ie, next_ie, ilist, list) { + iput(ie->inode); + list_del(&ie->list); + kmem_cache_free(winode_slab, ie); + } +} + +static int check_valid_map(struct f2fs_sb_info *sbi, + unsigned int segno, int offset) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct seg_entry *sentry; + int ret; + + mutex_lock(&sit_i->sentry_lock); + sentry = get_seg_entry(sbi, segno); + ret = f2fs_test_bit(offset, sentry->cur_valid_map); + mutex_unlock(&sit_i->sentry_lock); + return ret; +} + +/* + * This function compares node address got in summary with that in NAT. + * On validity, copy that node with cold status, otherwise (invalid node) + * ignore that. + */ +static void gc_node_segment(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, unsigned int segno, int gc_type) +{ + bool initial = true; + struct f2fs_summary *entry; + int off; + +next_step: + entry = sum; + + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + nid_t nid = le32_to_cpu(entry->nid); + struct page *node_page; + + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; + + if (check_valid_map(sbi, segno, off) == 0) + continue; + + if (initial) { + ra_node_page(sbi, nid); + continue; + } + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + continue; + + /* set page dirty and write it */ + if (gc_type == FG_GC) { + f2fs_wait_on_page_writeback(node_page, NODE, true); + set_page_dirty(node_page); + } else { + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } + f2fs_put_page(node_page, 1); + stat_inc_node_blk_count(sbi, 1); + } + + if (initial) { + initial = false; + goto next_step; + } + + if (gc_type == FG_GC) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + sync_node_pages(sbi, 0, &wbc); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) + goto next_step; + } +} + +/* + * Calculate start block index indicating the given node offset. + * Be careful, caller should give this node offset only indicating direct node + * blocks. If any node offsets, which point the other types of node blocks such + * as indirect or double indirect node blocks, are given, it must be a caller's + * bug. + */ +block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) +{ + unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; + unsigned int bidx; + + if (node_ofs == 0) + return 0; + + if (node_ofs <= 2) { + bidx = node_ofs - 1; + } else if (node_ofs <= indirect_blks) { + int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 2 - dec; + } else { + int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 5 - dec; + } + return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); +} + +static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct node_info *dni, block_t blkaddr, unsigned int *nofs) +{ + struct page *node_page; + nid_t nid; + unsigned int ofs_in_node; + block_t source_blkaddr; + + nid = le32_to_cpu(sum->nid); + ofs_in_node = le16_to_cpu(sum->ofs_in_node); + + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return 0; + + get_node_info(sbi, nid, dni); + + if (sum->version != dni->version) { + f2fs_put_page(node_page, 1); + return 0; + } + + *nofs = ofs_of_node(node_page); + source_blkaddr = datablock_addr(node_page, ofs_in_node); + f2fs_put_page(node_page, 1); + + if (source_blkaddr != blkaddr) + return 0; + return 1; +} + +static void move_data_page(struct inode *inode, struct page *page, int gc_type) +{ + if (gc_type == BG_GC) { + if (PageWriteback(page)) + goto out; + set_page_dirty(page); + set_cold_data(page); + } else { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + f2fs_wait_on_page_writeback(page, DATA, true); + + if (clear_page_dirty_for_io(page) && + S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + set_cold_data(page); + do_write_data_page(page); + clear_cold_data(page); + } +out: + f2fs_put_page(page, 1); +} + +/* + * This function tries to get parent node of victim data block, and identifies + * data block validity. If the block is valid, copy that with cold status and + * modify parent node. + * If the parent node is not valid or the data block address is different, + * the victim data block is ignored. + */ +static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct list_head *ilist, unsigned int segno, int gc_type) +{ + struct super_block *sb = sbi->sb; + struct f2fs_summary *entry; + block_t start_addr; + int off; + int phase = 0; + + start_addr = START_BLOCK(sbi, segno); + +next_step: + entry = sum; + + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + struct page *data_page; + struct inode *inode; + struct node_info dni; /* dnode info for the data */ + unsigned int ofs_in_node, nofs; + block_t start_bidx; + + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; + + if (check_valid_map(sbi, segno, off) == 0) + continue; + + if (phase == 0) { + ra_node_page(sbi, le32_to_cpu(entry->nid)); + continue; + } + + /* Get an inode by ino with checking validity */ + if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) + continue; + + if (phase == 1) { + ra_node_page(sbi, dni.ino); + continue; + } + + ofs_in_node = le16_to_cpu(entry->ofs_in_node); + + if (phase == 2) { + inode = f2fs_iget(sb, dni.ino); + if (IS_ERR(inode)) + continue; + + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + + data_page = find_data_page(inode, + start_bidx + ofs_in_node, false); + if (IS_ERR(data_page)) + goto next_iput; + + f2fs_put_page(data_page, 0); + add_gc_inode(inode, ilist); + } else { + inode = find_gc_inode(dni.ino, ilist); + if (inode) { + start_bidx = start_bidx_of_node(nofs, + F2FS_I(inode)); + data_page = get_lock_data_page(inode, + start_bidx + ofs_in_node); + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); + } + } + continue; +next_iput: + iput(inode); + } + + if (++phase < 4) + goto next_step; + + if (gc_type == FG_GC) { + f2fs_submit_bio(sbi, DATA, true); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) { + phase = 2; + goto next_step; + } + } +} + +static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, + int gc_type, int type) +{ + struct sit_info *sit_i = SIT_I(sbi); + int ret; + mutex_lock(&sit_i->sentry_lock); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + mutex_unlock(&sit_i->sentry_lock); + return ret; +} + +static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, + struct list_head *ilist, int gc_type) +{ + struct page *sum_page; + struct f2fs_summary_block *sum; + struct blk_plug plug; + + /* read segment summary of victim */ + sum_page = get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return; + + blk_start_plug(&plug); + + sum = page_address(sum_page); + + switch (GET_SUM_TYPE((&sum->footer))) { + case SUM_TYPE_NODE: + gc_node_segment(sbi, sum->entries, segno, gc_type); + break; + case SUM_TYPE_DATA: + gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + break; + } + blk_finish_plug(&plug); + + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); + stat_inc_call_count(sbi->stat_info); + + f2fs_put_page(sum_page, 1); +} + +int f2fs_gc(struct f2fs_sb_info *sbi) +{ + struct list_head ilist; + unsigned int segno, i; + int gc_type = BG_GC; + int nfree = 0; + int ret = -1; + + INIT_LIST_HEAD(&ilist); +gc_more: + if (!(sbi->sb->s_flags & MS_ACTIVE)) + goto stop; + + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { + gc_type = FG_GC; + write_checkpoint(sbi, false); + } + + if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + goto stop; + ret = 0; + + for (i = 0; i < sbi->segs_per_sec; i++) + do_garbage_collect(sbi, segno + i, &ilist, gc_type); + + if (gc_type == FG_GC) { + sbi->cur_victim_sec = NULL_SEGNO; + nfree++; + WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); + } + + if (has_not_enough_free_secs(sbi, nfree)) + goto gc_more; + + if (gc_type == FG_GC) + write_checkpoint(sbi, false); +stop: + mutex_unlock(&sbi->gc_mutex); + + put_gc_inode(&ilist); + return ret; +} + +void build_gc_manager(struct f2fs_sb_info *sbi) +{ + DIRTY_I(sbi)->v_ops = &default_v_ops; +} + +int __init create_gc_caches(void) +{ + winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", + sizeof(struct inode_entry), NULL); + if (!winode_slab) + return -ENOMEM; + return 0; +} + +void destroy_gc_caches(void) +{ + kmem_cache_destroy(winode_slab); +} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h new file mode 100644 index 00000000000..28c7d8e320c --- /dev/null +++ b/fs/f2fs/gc.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/gc.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define GC_THREAD_MIN_WB_PAGES 1 /* + * a threshold to determine + * whether IO subsystem is idle + * or not + */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ +#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ +#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ + +/* Search max. number of dirty segments to select a victim segment */ +#define MAX_VICTIM_SEARCH 20 + +struct f2fs_gc_kthread { + struct task_struct *f2fs_gc_task; + wait_queue_head_t gc_wait_queue_head; + + /* for gc sleep time */ + unsigned int min_sleep_time; + unsigned int max_sleep_time; + unsigned int no_gc_sleep_time; + + /* for changing gc mode */ + unsigned int gc_idle; +}; + +struct inode_entry { + struct list_head list; + struct inode *inode; +}; + +/* + * inline functions + */ +static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) +{ + if (free_segments(sbi) < overprovision_segments(sbi)) + return 0; + else + return (free_segments(sbi) - overprovision_segments(sbi)) + << sbi->log_blocks_per_seg; +} + +static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) +{ + return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; +} + +static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t reclaimable_user_blocks = sbi->user_block_count - + written_block_count(sbi); + return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; +} + +static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +{ + if (wait == gc_th->no_gc_sleep_time) + return wait; + + wait += gc_th->min_sleep_time; + if (wait > gc_th->max_sleep_time) + wait = gc_th->max_sleep_time; + return wait; +} + +static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +{ + if (wait == gc_th->no_gc_sleep_time) + wait = gc_th->max_sleep_time; + + wait -= gc_th->min_sleep_time; + if (wait <= gc_th->min_sleep_time) + wait = gc_th->min_sleep_time; + return wait; +} + +static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) +{ + block_t invalid_user_blocks = sbi->user_block_count - + written_block_count(sbi); + /* + * Background GC is triggered with the following condition. + * 1. There are a number of invalid blocks. + * 2. There is not enough free space. + */ + if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && + free_user_blocks(sbi) < limit_free_user_blocks(sbi)) + return true; + return false; +} + +static inline int is_idle(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct request_list *rl = &q->rq; + return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); +} diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c new file mode 100644 index 00000000000..9e352f15484 --- /dev/null +++ b/fs/f2fs/hash.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/hash.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext3/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include "f2fs.h" + +/* + * Hashing code copied from ext3 + */ +#define DELTA 0x9E3779B9 + +static void TEA_transform(unsigned int buf[4], unsigned int const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) +{ + unsigned pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num * 4) + len = num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) +{ + __u32 hash; + f2fs_hash_t f2fs_hash; + const char *p; + __u32 in[8], buf[4]; + + if ((len <= 2) && (name[0] == '.') && + (name[1] == '.' || name[1] == '\0')) + return 0; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + p = name; + while (1) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + p += 16; + if (len <= 16) + break; + len -= 16; + } + hash = buf[0]; + f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); + return f2fs_hash; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c new file mode 100644 index 00000000000..6b7fbd39180 --- /dev/null +++ b/fs/f2fs/inode.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/inode.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" + +#include + +void f2fs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = F2FS_I(inode)->i_flags; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | + S_NOATIME | S_DIRSYNC); + + if (flags & FS_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & FS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & FS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & FS_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & FS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +static int do_read_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct page *node_page; + struct f2fs_node *rn; + struct f2fs_inode *ri; + + /* Check if ino is within scope */ + if (check_nid_range(sbi, inode->i_ino)) { + f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", + (unsigned long) inode->i_ino); + return -EINVAL; + } + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + rn = F2FS_NODE(node_page); + ri = &(rn->i); + + inode->i_mode = le16_to_cpu(ri->i_mode); + inode->i_uid = le32_to_cpu(ri->i_uid); + inode->i_gid = le32_to_cpu(ri->i_gid); + set_nlink(inode, le32_to_cpu(ri->i_links)); + inode->i_size = le64_to_cpu(ri->i_size); + inode->i_blocks = le64_to_cpu(ri->i_blocks); + + inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); + inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); + inode->i_generation = le32_to_cpu(ri->i_generation); + if (ri->i_addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); + else + inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); + + fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); + fi->i_flags = le32_to_cpu(ri->i_flags); + fi->flags = 0; + fi->i_advise = ri->i_advise; + fi->i_pino = le32_to_cpu(ri->i_pino); + get_extent_info(&fi->ext, ri->i_ext); + get_inline_info(fi, ri); + f2fs_put_page(node_page, 1); + return 0; +} + +struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int ret = 0; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + + if (!(inode->i_state & I_NEW)) { + trace_f2fs_iget(inode); + return inode; + } + if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) + goto make_now; + + ret = do_read_inode(inode); + if (ret) + goto bad_inode; +make_now: + if (ino == F2FS_NODE_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_node_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (ino == F2FS_META_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_meta_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (S_ISREG(inode->i_mode)) { + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + inode->i_op = &f2fs_special_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + } else { + ret = -EIO; + goto bad_inode; + } + unlock_new_inode(inode); + trace_f2fs_iget(inode); + return inode; + +bad_inode: + iget_failed(inode); + trace_f2fs_iget_exit(inode, ret); + return ERR_PTR(ret); +} + +void update_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_node *rn; + struct f2fs_inode *ri; + + f2fs_wait_on_page_writeback(node_page, NODE, false); + + rn = F2FS_NODE(node_page); + ri = &(rn->i); + + ri->i_mode = cpu_to_le16(inode->i_mode); + ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_uid = cpu_to_le32(inode->i_uid); + ri->i_gid = cpu_to_le32(inode->i_gid); + ri->i_links = cpu_to_le32(inode->i_nlink); + ri->i_size = cpu_to_le64(i_size_read(inode)); + ri->i_blocks = cpu_to_le64(inode->i_blocks); + set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + set_raw_inline(F2FS_I(inode), ri); + + ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); + ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); + ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); + ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); + ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_generation = cpu_to_le32(inode->i_generation); + + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + ri->i_addr[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + ri->i_addr[1] = 0; + } else { + ri->i_addr[0] = 0; + ri->i_addr[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + ri->i_addr[2] = 0; + } + } + + set_cold_node(inode, node_page); + set_page_dirty(node_page); + clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); +} + +int update_inode_page(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *node_page; + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + update_inode(inode, node_page); + f2fs_put_page(node_page, 1); + return 0; +} + +int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret, ilock; + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return 0; + + if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + return 0; + + /* + * We need to lock here to prevent from producing dirty node pages + * during the urgent cleaning time when runing out of free sections. + */ + ilock = mutex_lock_op(sbi); + ret = update_inode_page(inode); + mutex_unlock_op(sbi, ilock); + + if (wbc) + f2fs_balance_fs(sbi); + + return ret; +} + +/* + * Called at the last iput() if i_nlink is zero + */ +void f2fs_evict_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + + trace_f2fs_evict_inode(inode); + truncate_inode_pages(&inode->i_data, 0); + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + goto no_delete; + + BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); + remove_dirty_dir_inode(inode); + + if (inode->i_nlink || is_bad_inode(inode)) + goto no_delete; + + set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); + i_size_write(inode, 0); + + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + ilock = mutex_lock_op(sbi); + remove_inode_page(inode); + mutex_unlock_op(sbi, ilock); + +no_delete: + end_writeback(inode); +} diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c new file mode 100644 index 00000000000..7e08105f3c4 --- /dev/null +++ b/fs/f2fs/namei.c @@ -0,0 +1,559 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/namei.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "xattr.h" +#include "acl.h" +#include + +static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + nid_t ino; + struct inode *inode; + bool nid_free = false; + int err, ilock; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + ilock = mutex_lock_op(sbi); + if (!alloc_nid(sbi, &ino)) { + mutex_unlock_op(sbi, ilock); + err = -ENOSPC; + goto fail; + } + mutex_unlock_op(sbi, ilock); + + if (IS_ANDROID_EMU(sbi, F2FS_I(dir), F2FS_I(dir))) + f2fs_android_emu(sbi, inode, &inode->i_uid, + &inode->i_gid, &mode); + else { + inode->i_uid = current_fsuid(); + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else { + inode->i_gid = current_fsgid(); + } + } + + inode->i_ino = ino; + inode->i_mode = mode; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_generation = sbi->s_next_generation++; + + err = insert_inode_locked(inode); + if (err) { + err = -EINVAL; + nid_free = true; + goto out; + } + trace_f2fs_new_inode(inode, 0); + mark_inode_dirty(inode); + return inode; + +out: + clear_nlink(inode); + unlock_new_inode(inode); +fail: + trace_f2fs_new_inode(inode, err); + make_bad_inode(inode); + iput(inode); + if (nid_free) + alloc_nid_failed(sbi, ino); + return ERR_PTR(err); +} + +static int is_multimedia_file(const unsigned char *s, const char *sub) +{ + size_t slen = strlen(s); + size_t sublen = strlen(sub); + + if (sublen > slen) + return 0; + + return !strncasecmp(s + slen - sublen, sub, sublen); +} + +/* + * Set multimedia files as cold files for hot/cold data separation + */ +static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + int i; + __u8 (*extlist)[8] = sbi->raw_super->extension_list; + + int count = le32_to_cpu(sbi->raw_super->extension_count); + for (i = 0; i < count; i++) { + if (is_multimedia_file(name, extlist[i])) { + file_set_cold(inode); + break; + } + } +} + +static int f2fs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + nid_t ino = 0; + int err, ilock; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) + set_cold_files(sbi, inode, dentry->d_name.name); + + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + ino = inode->i_ino; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + alloc_nid_done(sbi, ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, ino); + return err; +} + +static int f2fs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct super_block *sb; + struct f2fs_sb_info *sbi; + int err, ilock; + + if (inode->i_nlink >= F2FS_LINK_MAX) + return -EMLINK; + + sb = dir->i_sb; + sbi = F2FS_SB(sb); + + f2fs_balance_fs(sbi); + + inode->i_ctime = CURRENT_TIME; + ihold(inode); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + d_instantiate(dentry, inode); + return 0; +out: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + iput(inode); + return err; +} + +struct dentry *f2fs_get_parent(struct dentry *child) +{ + struct qstr dotdot = {.name = "..", .len = 2}; + unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); +} + +static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode = NULL; + struct f2fs_dir_entry *de; + struct page *page; + + if (dentry->d_name.len > F2FS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (de) { + nid_t ino = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + + inode = f2fs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + + return d_splice_alias(inode, dentry); +} + +static int f2fs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode = dentry->d_inode; + struct f2fs_dir_entry *de; + struct page *page; + int err = -ENOENT; + int ilock; + + trace_f2fs_unlink_enter(dir, dentry); + f2fs_balance_fs(sbi); + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (!de) + goto fail; + + err = acquire_orphan_inode(sbi); + if (err) { + kunmap(page); + f2fs_put_page(page, 0); + goto fail; + } + + ilock = mutex_lock_op(sbi); + f2fs_delete_entry(de, page, inode); + mutex_unlock_op(sbi, ilock); + + /* In order to evict this inode, we set it dirty */ + mark_inode_dirty(inode); +fail: + trace_f2fs_unlink_exit(inode, err); + return err; +} + +static int f2fs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + size_t symlen = strlen(symname) + 1; + int err, ilock; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + err = page_symlink(inode, symname, symlen); + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return err; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct f2fs_sb_info *sbi; + struct inode *inode; + int err, ilock; + + if (dir->i_nlink >= F2FS_LINK_MAX) + return -EMLINK; + + sbi = F2FS_SB(dir->i_sb); + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, S_IFDIR | mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out_fail; + + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + return 0; + +out_fail: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + if (f2fs_empty_dir(inode)) + return f2fs_unlink(dir, dentry); + return -ENOTEMPTY; +} + +static int f2fs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int err = 0; + int ilock; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + f2fs_balance_fs(sbi); + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + init_special_inode(inode, inode->i_mode, rdev); + inode->i_op = &f2fs_special_inode_operations; + + ilock = mutex_lock_op(sbi); + err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); + if (err) + goto out; + + alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); + unlock_new_inode(inode); + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct super_block *sb = old_dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct page *old_dir_page; + struct page *old_page, *new_page; + struct f2fs_dir_entry *old_dir_entry = NULL; + struct f2fs_dir_entry *old_entry; + struct f2fs_dir_entry *new_entry; + int err = -ENOENT, ilock = -1; + + f2fs_balance_fs(sbi); + + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + if (!old_entry) + goto out; + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); + if (!old_dir_entry) + goto out_old; + } + + ilock = mutex_lock_op(sbi); + + if (new_inode) { + + err = -ENOTEMPTY; + if (old_dir_entry && !f2fs_empty_dir(new_inode)) + goto out_dir; + + err = -ENOENT; + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, + &new_page); + if (!new_entry) + goto out_dir; + + err = acquire_orphan_inode(sbi); + if (err) + goto put_out_dir; + + if (update_dent_inode(old_inode, &new_dentry->d_name)) { + release_orphan_inode(sbi); + goto put_out_dir; + } + + f2fs_set_link(new_dir, new_entry, new_page, old_inode); + + new_inode->i_ctime = CURRENT_TIME; + if (old_dir_entry) + drop_nlink(new_inode); + drop_nlink(new_inode); + + if (!new_inode->i_nlink) + add_orphan_inode(sbi, new_inode->i_ino); + else + release_orphan_inode(sbi); + + update_inode_page(old_inode); + update_inode_page(new_inode); + } else { + if (old_dir_entry) { + err = -EMLINK; + if (new_dir->i_nlink >= F2FS_LINK_MAX) + goto out_dir; + } + + err = f2fs_add_link(new_dentry, old_inode); + if (err) + goto out_dir; + + if (old_dir_entry) { + inc_nlink(new_dir); + update_inode_page(new_dir); + } + } + + old_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(old_inode); + + f2fs_delete_entry(old_entry, old_page, NULL); + + if (old_dir_entry) { + if (old_dir != new_dir) { + f2fs_set_link(old_inode, old_dir_entry, + old_dir_page, new_dir); + } else { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + drop_nlink(old_dir); + update_inode_page(old_dir); + } + + mutex_unlock_op(sbi, ilock); + return 0; + +put_out_dir: + if (PageLocked(new_page)) + f2fs_put_page(new_page, 1); + else + f2fs_put_page(new_page, 0); +out_dir: + if (old_dir_entry) { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + mutex_unlock_op(sbi, ilock); +out_old: + kunmap(old_page); + f2fs_put_page(old_page, 0); +out: + return err; +} + +const struct inode_operations f2fs_dir_inode_operations = { + .create = f2fs_create, + .lookup = f2fs_lookup, + .link = f2fs_link, + .unlink = f2fs_unlink, + .symlink = f2fs_symlink, + .mkdir = f2fs_mkdir, + .rmdir = f2fs_rmdir, + .mknod = f2fs_mknod, + .rename = f2fs_rename, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_special_inode_operations = { + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c new file mode 100644 index 00000000000..5ad3c3b6685 --- /dev/null +++ b/fs/f2fs/node.c @@ -0,0 +1,1861 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/node.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include + +static struct kmem_cache *nat_entry_slab; +static struct kmem_cache *free_nid_slab; + +static void clear_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + unsigned int long flags; + + if (PageDirty(page)) { + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + + clear_page_dirty_for_io(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + } + ClearPageUptodate(page); +} + +static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + pgoff_t index = current_nat_addr(sbi, nid); + return get_meta_page(sbi, index); +} + +static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct page *src_page; + struct page *dst_page; + pgoff_t src_off; + pgoff_t dst_off; + void *src_addr; + void *dst_addr; + struct f2fs_nm_info *nm_i = NM_I(sbi); + + src_off = current_nat_addr(sbi, nid); + dst_off = next_nat_addr(sbi, src_off); + + /* get current nat block page with lock */ + src_page = get_meta_page(sbi, src_off); + + /* Dirty src_page means that it is already the new target NAT page. */ + if (PageDirty(src_page)) + return src_page; + + dst_page = grab_meta_page(sbi, dst_off); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_nat(nm_i, nid); + + return dst_page; +} + +/* + * Readahead NAT pages + */ +static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct blk_plug plug; + struct page *page; + pgoff_t index; + int i; + + blk_start_plug(&plug); + + for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { + if (nid >= nm_i->max_nid) + nid = 0; + index = current_nat_addr(sbi, nid); + + page = grab_cache_page(mapping, index); + if (!page) + continue; + if (PageUptodate(page)) { + f2fs_put_page(page, 1); + continue; + } + if (f2fs_readpage(sbi, page, index, READ)) + continue; + + f2fs_put_page(page, 0); + } + blk_finish_plug(&plug); +} + +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +{ + return radix_tree_lookup(&nm_i->nat_root, n); +} + +static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, + nid_t start, unsigned int nr, struct nat_entry **ep) +{ + return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); +} + +static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) +{ + list_del(&e->list); + radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); + nm_i->nat_cnt--; + kmem_cache_free(nat_entry_slab, e); +} + +int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + int is_cp = 1; + + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e && !e->checkpointed) + is_cp = 0; + read_unlock(&nm_i->nat_tree_lock); + return is_cp; +} + +static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct nat_entry *new; + + new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + if (!new) + return NULL; + if (radix_tree_insert(&nm_i->nat_root, nid, new)) { + kmem_cache_free(nat_entry_slab, new); + return NULL; + } + memset(new, 0, sizeof(struct nat_entry)); + nat_set_nid(new, nid); + list_add_tail(&new->list, &nm_i->nat_entries); + nm_i->nat_cnt++; + return new; +} + +static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, + struct f2fs_nat_entry *ne) +{ + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (!e) { + e = grab_nat_entry(nm_i, nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); + nat_set_ino(e, le32_to_cpu(ne->ino)); + nat_set_version(e, ne->version); + e->checkpointed = true; + } + write_unlock(&nm_i->nat_tree_lock); +} + +static int set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, + block_t new_blkaddr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, ni->nid); + if (!e) { + e = grab_nat_entry(nm_i, ni->nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + e->ni = *ni; + e->checkpointed = true; + BUG_ON(ni->blk_addr == NEW_ADDR); + } else if (new_blkaddr == NEW_ADDR) { + /* + * when nid is reallocated, + * previous nat entry can be remained in nat cache. + * So, reinitialize it with new information. + */ + e->ni = *ni; + if (ni->blk_addr != NULL_ADDR) { + f2fs_msg(sbi->sb, KERN_ERR, "node block address is " + "already set: %u", ni->blk_addr); + f2fs_handle_error(sbi); + /* just give up on this node */ + write_unlock(&nm_i->nat_tree_lock); + return -EIO; + } + } + + if (new_blkaddr == NEW_ADDR) + e->checkpointed = false; + + /* sanity check */ + BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); + BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && + new_blkaddr == NULL_ADDR); + BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && + new_blkaddr == NEW_ADDR); + BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && + nat_get_blkaddr(e) != NULL_ADDR && + new_blkaddr == NEW_ADDR); + + /* increament version no as node is removed */ + if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { + unsigned char version = nat_get_version(e); + nat_set_version(e, inc_node_version(version)); + } + + /* change address */ + nat_set_blkaddr(e, new_blkaddr); + __set_nat_cache_dirty(nm_i, e); + write_unlock(&nm_i->nat_tree_lock); + return 0; +} + +static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) + return 0; + + write_lock(&nm_i->nat_tree_lock); + while (nr_shrink && !list_empty(&nm_i->nat_entries)) { + struct nat_entry *ne; + ne = list_first_entry(&nm_i->nat_entries, + struct nat_entry, list); + __del_from_nat_cache(nm_i, ne); + nr_shrink--; + } + write_unlock(&nm_i->nat_tree_lock); + return nr_shrink; +} + +/* + * This function returns always success + */ +void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + nid_t start_nid = START_NID(nid); + struct f2fs_nat_block *nat_blk; + struct page *page = NULL; + struct f2fs_nat_entry ne; + struct nat_entry *e; + int i; + + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + ni->nid = nid; + + /* Check nat cache */ + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) { + ni->ino = nat_get_ino(e); + ni->blk_addr = nat_get_blkaddr(e); + ni->version = nat_get_version(e); + } + read_unlock(&nm_i->nat_tree_lock); + if (e) + return; + + /* Check current segment summary */ + mutex_lock(&curseg->curseg_mutex); + i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); + if (i >= 0) { + ne = nat_in_journal(sum, i); + node_info_from_raw_nat(ni, &ne); + } + mutex_unlock(&curseg->curseg_mutex); + if (i >= 0) + goto cache; + + /* Fill node_info from nat page */ + page = get_current_nat_page(sbi, start_nid); + nat_blk = (struct f2fs_nat_block *)page_address(page); + ne = nat_blk->entries[nid - start_nid]; + node_info_from_raw_nat(ni, &ne); + f2fs_put_page(page, 1); +cache: + /* cache nat entry */ + cache_nat_entry(NM_I(sbi), nid, &ne); +} + +/* + * The maximum depth is four. + * Offset[0] will have raw inode offset. + */ +static int get_node_path(struct f2fs_inode_info *fi, long block, + int offset[4], unsigned int noffset[4]) +{ + const long direct_index = ADDRS_PER_INODE(fi); + const long direct_blks = ADDRS_PER_BLOCK; + const long dptrs_per_blk = NIDS_PER_BLOCK; + const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; + const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; + int n = 0; + int level = 0; + + noffset[0] = 0; + + if (block < direct_index) { + offset[n] = block; + goto got; + } + block -= direct_index; + if (block < direct_blks) { + offset[n++] = NODE_DIR1_BLOCK; + noffset[n] = 1; + offset[n] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < direct_blks) { + offset[n++] = NODE_DIR2_BLOCK; + noffset[n] = 2; + offset[n] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND1_BLOCK; + noffset[n] = 3; + offset[n++] = block / direct_blks; + noffset[n] = 4 + offset[n - 1]; + offset[n] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND2_BLOCK; + noffset[n] = 4 + dptrs_per_blk; + offset[n++] = block / direct_blks; + noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; + offset[n] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < dindirect_blks) { + offset[n++] = NODE_DIND_BLOCK; + noffset[n] = 5 + (dptrs_per_blk * 2); + offset[n++] = block / indirect_blks; + noffset[n] = 6 + (dptrs_per_blk * 2) + + offset[n - 1] * (dptrs_per_blk + 1); + offset[n++] = (block / direct_blks) % dptrs_per_blk; + noffset[n] = 7 + (dptrs_per_blk * 2) + + offset[n - 2] * (dptrs_per_blk + 1) + + offset[n - 1]; + offset[n] = block % direct_blks; + level = 3; + goto got; + } else { + BUG(); + } +got: + return level; +} + +/* + * Caller should call f2fs_put_dnode(dn). + * Also, it should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op() only if ro is not set RDONLY_NODE. + * In the case of RDONLY_NODE, we don't need to care about mutex. + */ +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *npage[4]; + struct page *parent; + int offset[4]; + unsigned int noffset[4]; + nid_t nids[4]; + int level, i; + int err = 0; + + level = get_node_path(F2FS_I(dn->inode), index, offset, noffset); + + nids[0] = dn->inode->i_ino; + npage[0] = dn->inode_page; + + if (!npage[0]) { + npage[0] = get_node_page(sbi, nids[0]); + if (IS_ERR(npage[0])) + return PTR_ERR(npage[0]); + } + parent = npage[0]; + if (level != 0) + nids[1] = get_nid(parent, offset[0], true); + dn->inode_page = npage[0]; + dn->inode_page_locked = true; + + /* get indirect or direct nodes */ + for (i = 1; i <= level; i++) { + bool done = false; + + if (!nids[i] && mode == ALLOC_NODE) { + /* alloc new node */ + if (!alloc_nid(sbi, &(nids[i]))) { + err = -ENOSPC; + goto release_pages; + } + + dn->nid = nids[i]; + npage[i] = new_node_page(dn, noffset[i], NULL); + if (IS_ERR(npage[i])) { + alloc_nid_failed(sbi, nids[i]); + err = PTR_ERR(npage[i]); + goto release_pages; + } + + set_nid(parent, offset[i - 1], nids[i], i == 1); + alloc_nid_done(sbi, nids[i]); + done = true; + } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { + npage[i] = get_node_page_ra(parent, offset[i - 1]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + goto release_pages; + } + done = true; + } + if (i == 1) { + dn->inode_page_locked = false; + unlock_page(parent); + } else { + f2fs_put_page(parent, 1); + } + + if (!done) { + npage[i] = get_node_page(sbi, nids[i]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + f2fs_put_page(npage[0], 0); + goto release_out; + } + } + if (i < level) { + parent = npage[i]; + nids[i + 1] = get_nid(parent, offset[i], false); + } + } + dn->nid = nids[level]; + dn->ofs_in_node = offset[level]; + dn->node_page = npage[level]; + dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + return 0; + +release_pages: + f2fs_put_page(parent, 1); + if (i > 1) + f2fs_put_page(npage[0], 0); +release_out: + dn->inode_page = NULL; + dn->node_page = NULL; + return err; +} + +static void truncate_node(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct node_info ni; + + get_node_info(sbi, dn->nid, &ni); + if (dn->inode->i_blocks == 0) { + if (ni.blk_addr != NULL_ADDR) { + f2fs_msg(sbi->sb, KERN_ERR, + "empty node still has block address %u ", + ni.blk_addr); + f2fs_handle_error(sbi); + } + goto invalidate; + } + BUG_ON(ni.blk_addr == NULL_ADDR); + + /* Deallocate node address */ + invalidate_blocks(sbi, ni.blk_addr); + dec_valid_node_count(sbi, dn->inode, 1); + set_node_addr(sbi, &ni, NULL_ADDR); + + if (dn->nid == dn->inode->i_ino) { + remove_orphan_inode(sbi, dn->nid); + dec_valid_inode_count(sbi); + } else { + sync_inode_page(dn); + } +invalidate: + clear_node_page_dirty(dn->node_page); + F2FS_SET_SB_DIRT(sbi); + + f2fs_put_page(dn->node_page, 1); + dn->node_page = NULL; + trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); +} + +static int truncate_dnode(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *page; + + if (dn->nid == 0) + return 1; + + /* get direct node */ + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) + return 1; + else if (IS_ERR(page)) + return PTR_ERR(page); + + /* Make dnode_of_data for parameter */ + dn->node_page = page; + dn->ofs_in_node = 0; + truncate_data_blocks(dn); + truncate_node(dn); + return 1; +} + +static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, + int ofs, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct dnode_of_data rdn = *dn; + struct page *page; + struct f2fs_node *rn; + nid_t child_nid; + unsigned int child_nofs; + int freed = 0; + int i, ret; + + if (dn->nid == 0) + return NIDS_PER_BLOCK + 1; + + trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); + + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page)) { + trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); + return PTR_ERR(page); + } + + rn = F2FS_NODE(page); + if (depth < 3) { + for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) + continue; + rdn.nid = child_nid; + ret = truncate_dnode(&rdn); + if (ret < 0) + goto out_err; + set_nid(page, i, 0, false); + } + } else { + child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; + for (i = ofs; i < NIDS_PER_BLOCK; i++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) { + child_nofs += NIDS_PER_BLOCK + 1; + continue; + } + rdn.nid = child_nid; + ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); + if (ret == (NIDS_PER_BLOCK + 1)) { + set_nid(page, i, 0, false); + child_nofs += ret; + } else if (ret < 0 && ret != -ENOENT) { + goto out_err; + } + } + freed = child_nofs; + } + + if (!ofs) { + /* remove current indirect node */ + dn->node_page = page; + truncate_node(dn); + freed++; + } else { + f2fs_put_page(page, 1); + } + trace_f2fs_truncate_nodes_exit(dn->inode, freed); + return freed; + +out_err: + f2fs_put_page(page, 1); + trace_f2fs_truncate_nodes_exit(dn->inode, ret); + return ret; +} + +static int truncate_partial_nodes(struct dnode_of_data *dn, + struct f2fs_inode *ri, int *offset, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *pages[2]; + nid_t nid[3]; + nid_t child_nid; + int err = 0; + int i; + int idx = depth - 2; + + nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + if (!nid[0]) + return 0; + + /* get indirect nodes in the path */ + for (i = 0; i < depth - 1; i++) { + /* refernece count'll be increased */ + pages[i] = get_node_page(sbi, nid[i]); + if (IS_ERR(pages[i])) { + depth = i + 1; + err = PTR_ERR(pages[i]); + goto fail; + } + nid[i + 1] = get_nid(pages[i], offset[i + 1], false); + } + + /* free direct nodes linked to a partial indirect node */ + for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { + child_nid = get_nid(pages[idx], i, false); + if (!child_nid) + continue; + dn->nid = child_nid; + err = truncate_dnode(dn); + if (err < 0) + goto fail; + set_nid(pages[idx], i, 0, false); + } + + if (offset[depth - 1] == 0) { + dn->node_page = pages[idx]; + dn->nid = nid[idx]; + truncate_node(dn); + } else { + f2fs_put_page(pages[idx], 1); + } + offset[idx]++; + offset[depth - 1] = 0; +fail: + for (i = depth - 3; i >= 0; i--) + f2fs_put_page(pages[i], 1); + + trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); + + return err; +} + +/* + * All the block addresses of data and nodes should be nullified. + */ +int truncate_inode_blocks(struct inode *inode, pgoff_t from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *node_mapping = sbi->node_inode->i_mapping; + int err = 0, cont = 1; + int level, offset[4], noffset[4]; + unsigned int nofs = 0; + struct f2fs_node *rn; + struct dnode_of_data dn; + struct page *page; + + trace_f2fs_truncate_inode_blocks_enter(inode, from); + + level = get_node_path(F2FS_I(inode), from, offset, noffset); +restart: + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); + return PTR_ERR(page); + } + + set_new_dnode(&dn, inode, page, NULL, 0); + unlock_page(page); + + rn = F2FS_NODE(page); + switch (level) { + case 0: + case 1: + nofs = noffset[1]; + break; + case 2: + nofs = noffset[1]; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + nofs += 1 + NIDS_PER_BLOCK; + break; + case 3: + nofs = 5 + 2 * NIDS_PER_BLOCK; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + break; + default: + BUG(); + } + +skip_partial: + while (cont) { + dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); + switch (offset[0]) { + case NODE_DIR1_BLOCK: + case NODE_DIR2_BLOCK: + err = truncate_dnode(&dn); + break; + + case NODE_IND1_BLOCK: + case NODE_IND2_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 2); + break; + + case NODE_DIND_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 3); + cont = 0; + break; + + default: + BUG(); + } + if (err < 0 && err != -ENOENT) + goto fail; + if (offset[1] == 0 && + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { + lock_page(page); + if (page->mapping != node_mapping) { + f2fs_put_page(page, 1); + goto restart; + } + wait_on_page_writeback(page); + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; + set_page_dirty(page); + unlock_page(page); + } + offset[1] = 0; + offset[0]++; + nofs += err; + } +fail: + f2fs_put_page(page, 0); + trace_f2fs_truncate_inode_blocks_exit(inode, err); + return err > 0 ? 0 : err; +} + +int truncate_xattr_node(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + nid_t nid = F2FS_I(inode)->i_xattr_nid; + struct dnode_of_data dn; + struct page *npage; + + if (!nid) + return 0; + + npage = get_node_page(sbi, nid); + if (IS_ERR(npage)) + return PTR_ERR(npage); + + F2FS_I(inode)->i_xattr_nid = 0; + + /* need to do checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + + set_new_dnode(&dn, inode, page, npage, nid); + + if (page) + dn.inode_page_locked = 1; + truncate_node(&dn); + return 0; +} + +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ +int remove_inode_page(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + nid_t ino = inode->i_ino; + struct dnode_of_data dn; + int err; + + page = get_node_page(sbi, ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + err = truncate_xattr_node(inode, page); + if (err) { + f2fs_put_page(page, 1); + return err; + } + + /* 0 is possible, after f2fs_new_inode() is failed */ + if (inode->i_blocks != 0 && inode->i_blocks != 1) { + f2fs_msg(sbi->sb, KERN_ERR, "inode %u still has %llu blocks", + ino, inode->i_blocks); + f2fs_handle_error(sbi); + } + set_new_dnode(&dn, inode, page, page, ino); + truncate_node(&dn); + return 0; +} + +struct page *new_inode_page(struct inode *inode, const struct qstr *name) +{ + struct dnode_of_data dn; + + /* allocate inode page for new inode */ + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); + + /* caller should f2fs_put_page(page, 1); */ + return new_node_page(&dn, 0, NULL); +} + +struct page *new_node_page(struct dnode_of_data *dn, + unsigned int ofs, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + struct node_info old_ni, new_ni; + struct page *page; + int err; + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return ERR_PTR(-EPERM); + + page = grab_cache_page(mapping, dn->nid); + if (!page) + return ERR_PTR(-ENOMEM); + + if (!inc_valid_node_count(sbi, dn->inode, 1)) { + err = -ENOSPC; + goto fail; + } + + get_node_info(sbi, dn->nid, &old_ni); + + /* Reinitialize old_ni with new node page */ + BUG_ON(old_ni.blk_addr != NULL_ADDR); + new_ni = old_ni; + new_ni.ino = dn->inode->i_ino; + set_node_addr(sbi, &new_ni, NEW_ADDR); + + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); + set_cold_node(dn->inode, page); + SetPageUptodate(page); + set_page_dirty(page); + + if (ofs == XATTR_NODE_OFFSET) + F2FS_I(dn->inode)->i_xattr_nid = dn->nid; + + dn->node_page = page; + if (ipage) + update_inode(dn->inode, ipage); + else + sync_inode_page(dn); + if (ofs == 0) + inc_valid_inode_count(sbi); + + return page; + +fail: + clear_node_page_dirty(page); + f2fs_put_page(page, 1); + return ERR_PTR(err); +} + +/* + * Caller should do after getting the following values. + * 0: f2fs_put_page(page, 0) + * LOCKED_PAGE: f2fs_put_page(page, 1) + * error: nothing + */ +static int read_node_page(struct page *page, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct node_info ni; + + get_node_info(sbi, page->index, &ni); + + if (ni.blk_addr == NULL_ADDR) { + f2fs_put_page(page, 1); + return -ENOENT; + } + + if (PageUptodate(page)) + return LOCKED_PAGE; + + return f2fs_readpage(sbi, page, ni.blk_addr, type); +} + +/* + * Readahead a node page + */ +void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *apage; + int err; + + apage = find_get_page(mapping, nid); + if (apage && PageUptodate(apage)) { + f2fs_put_page(apage, 0); + return; + } + f2fs_put_page(apage, 0); + + apage = grab_cache_page(mapping, nid); + if (!apage) + return; + + err = read_node_page(apage, READA); + if (err == 0) + f2fs_put_page(apage, 0); + else if (err == LOCKED_PAGE) + f2fs_put_page(apage, 1); +} + +struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *page; + int err; +repeat: + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READ_SYNC); + if (err < 0) + return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto got_it; + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +got_it: + if (nid != nid_of_node(page)) { + f2fs_msg(sbi->sb, KERN_ERR, "page node id does not match " + "request: %lu", nid); + f2fs_handle_error(sbi); + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + mark_page_accessed(page); + return page; +} + +/* + * Return a locked page for the desired node page. + * And, readahead MAX_RA_NODE number of node pages. + */ +struct page *get_node_page_ra(struct page *parent, int start) +{ + struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + struct blk_plug plug; + struct page *page; + int err, i, end; + nid_t nid; + + /* First, try getting the desired direct node. */ + nid = get_nid(parent, start, false); + if (!nid) + return ERR_PTR(-ENOENT); +repeat: + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READ_SYNC); + if (err < 0) + return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto page_hit; + + blk_start_plug(&plug); + + /* Then, try readahead for siblings of the desired node */ + end = start + MAX_RA_NODE; + end = min(end, NIDS_PER_BLOCK); + for (i = start + 1; i < end; i++) { + nid = get_nid(parent, i, false); + if (!nid) + continue; + ra_node_page(sbi, nid); + } + + blk_finish_plug(&plug); + + lock_page(page); + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +page_hit: + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + mark_page_accessed(page); + return page; +} + +void sync_inode_page(struct dnode_of_data *dn) +{ + if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { + update_inode(dn->inode, dn->node_page); + } else if (dn->inode_page) { + if (!dn->inode_page_locked) + lock_page(dn->inode_page); + update_inode(dn->inode, dn->inode_page); + if (!dn->inode_page_locked) + unlock_page(dn->inode_page); + } else { + update_inode_page(dn->inode); + } +} + +int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, + struct writeback_control *wbc) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + pgoff_t index, end; + struct pagevec pvec; + int step = ino ? 2 : 0; + int nwritten = 0, wrote = 0; + + pagevec_init(&pvec, 0); + +next_step: + index = 0; + end = LONG_MAX; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * flushing sequence with step: + * 0. indirect nodes + * 1. dentry dnodes + * 2. file dnodes + */ + if (step == 0 && IS_DNODE(page)) + continue; + if (step == 1 && (!IS_DNODE(page) || + is_cold_node(page))) + continue; + if (step == 2 && (!IS_DNODE(page) || + !is_cold_node(page))) + continue; + + /* + * If an fsync mode, + * we should not skip writing node pages. + */ + if (ino && ino_of_node(page) == ino) + lock_page(page); + else if (!trylock_page(page)) + continue; + + if (unlikely(page->mapping != mapping)) { +continue_unlock: + unlock_page(page); + continue; + } + if (ino && ino_of_node(page) != ino) + goto continue_unlock; + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + /* called by fsync() */ + if (ino && IS_DNODE(page)) { + int mark = !is_checkpointed_node(sbi, ino); + set_fsync_mark(page, 1); + if (IS_INODE(page)) + set_dentry_mark(page, mark); + nwritten++; + } else { + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); + } + mapping->a_ops->writepage(page, wbc); + wrote++; + + if (--wbc->nr_to_write == 0) + break; + } + pagevec_release(&pvec); + cond_resched(); + + if (wbc->nr_to_write == 0) { + step = 2; + break; + } + } + + if (step < 2) { + step++; + goto next_step; + } + + if (wrote) + f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); + + return nwritten; +} + +static int f2fs_write_node_page(struct page *page, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + nid_t nid; + block_t new_addr; + struct node_info ni; + + if (sbi->por_doing) + goto redirty_out; + + wait_on_page_writeback(page); + + /* get old block addr of this node page */ + nid = nid_of_node(page); + BUG_ON(page->index != nid); + + get_node_info(sbi, nid, &ni); + + /* This page is already truncated */ + if (ni.blk_addr == NULL_ADDR) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } + + if (wbc->for_reclaim) + goto redirty_out; + + mutex_lock(&sbi->node_write); + set_page_writeback(page); + write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); + set_node_addr(sbi, &ni, new_addr); + dec_page_count(sbi, F2FS_DIRTY_NODES); + mutex_unlock(&sbi->node_write); + unlock_page(page); + return 0; + +redirty_out: + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; +} + +/* + * It is very important to gather dirty pages and write at once, so that we can + * submit a big bio without interfering other data writes. + * Be default, 512 pages (2MB), a segment size, is quite reasonable. + */ +#define COLLECT_DIRTY_NODES 512 +static int f2fs_write_node_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + long nr_to_write = wbc->nr_to_write; + + /* First check balancing cached NAT entries */ + if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { + f2fs_sync_fs(sbi->sb, true); + return 0; + } + + /* collect a number of dirty node pages and write together */ + if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) + return 0; + + /* if mounting is failed, skip writing node pages */ + wbc->nr_to_write = max_hw_blocks(sbi); + sync_node_pages(sbi, 0, wbc); + wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); + return 0; +} + +static int f2fs_set_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_NODES); + SetPagePrivate(page); + return 1; + } + return 0; +} + +static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (PageDirty(page)) + dec_page_count(sbi, F2FS_DIRTY_NODES); + ClearPagePrivate(page); +} + +static int f2fs_release_node_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + +/* + * Structure of the f2fs node operations + */ +const struct address_space_operations f2fs_node_aops = { + .writepage = f2fs_write_node_page, + .writepages = f2fs_write_node_pages, + .set_page_dirty = f2fs_set_node_page_dirty, + .invalidatepage = f2fs_invalidate_node_page, + .releasepage = f2fs_release_node_page, +}; + +static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) +{ + struct list_head *this; + struct free_nid *i; + list_for_each(this, head) { + i = list_entry(this, struct free_nid, list); + if (i->nid == n) + return i; + } + return NULL; +} + +static void __del_from_free_nid_list(struct free_nid *i) +{ + list_del(&i->list); + kmem_cache_free(free_nid_slab, i); +} + +static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +{ + struct free_nid *i; + struct nat_entry *ne; + bool allocated = false; + + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) + return -1; + + /* 0 nid should not be used */ + if (nid == 0) + return 0; + + if (!build) + goto retry; + + /* do not add allocated nids */ + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + allocated = true; + read_unlock(&nm_i->nat_tree_lock); + if (allocated) + return 0; +retry: + i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); + if (!i) { + cond_resched(); + goto retry; + } + i->nid = nid; + i->state = NID_NEW; + + spin_lock(&nm_i->free_nid_list_lock); + if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { + spin_unlock(&nm_i->free_nid_list_lock); + kmem_cache_free(free_nid_slab, i); + return 0; + } + list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->fcnt++; + spin_unlock(&nm_i->free_nid_list_lock); + return 1; +} + +static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct free_nid *i; + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + if (i && i->state == NID_NEW) { + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +static void scan_nat_page(struct f2fs_nm_info *nm_i, + struct page *nat_page, nid_t start_nid) +{ + struct f2fs_nat_block *nat_blk = page_address(nat_page); + block_t blk_addr; + int i; + + i = start_nid % NAT_ENTRY_PER_BLOCK; + + for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + + if (start_nid >= nm_i->max_nid) + break; + + blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); + BUG_ON(blk_addr == NEW_ADDR); + if (blk_addr == NULL_ADDR) { + if (add_free_nid(nm_i, start_nid, true) < 0) + break; + } + } +} + +static void build_free_nids(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i = 0; + nid_t nid = nm_i->next_scan_nid; + + /* Enough entries */ + if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + return; + + /* readahead nat pages to be scanned */ + ra_nat_pages(sbi, nid); + + while (1) { + struct page *page = get_current_nat_page(sbi, nid); + + scan_nat_page(nm_i, page, nid); + f2fs_put_page(page, 1); + + nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); + if (nid >= nm_i->max_nid) + nid = 0; + + if (i++ == FREE_NID_PAGES) + break; + } + + /* go to the next free nat pages to find free nids abundantly */ + nm_i->next_scan_nid = nid; + + /* find free nids from current sum_pages */ + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < nats_in_cursum(sum); i++) { + block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); + nid = le32_to_cpu(nid_in_journal(sum, i)); + if (addr == NULL_ADDR) + add_free_nid(nm_i, nid, true); + else + remove_free_nid(nm_i, nid); + } + mutex_unlock(&curseg->curseg_mutex); +} + +/* + * If this function returns success, caller can obtain a new nid + * from second parameter of this function. + * The returned nid could be used ino as well as nid when inode is created. + */ +bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i = NULL; + struct list_head *this; +retry: + if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) + return false; + + spin_lock(&nm_i->free_nid_list_lock); + + /* We should not use stale free nids created by build_free_nids */ + if (nm_i->fcnt && !sbi->on_build_free_nids) { + BUG_ON(list_empty(&nm_i->free_nid_list)); + list_for_each(this, &nm_i->free_nid_list) { + i = list_entry(this, struct free_nid, list); + if (i->state == NID_NEW) + break; + } + + BUG_ON(i->state != NID_NEW); + *nid = i->nid; + i->state = NID_ALLOC; + nm_i->fcnt--; + spin_unlock(&nm_i->free_nid_list_lock); + return true; + } + spin_unlock(&nm_i->free_nid_list_lock); + + /* Let's scan nat pages and its caches to get free nids */ + mutex_lock(&nm_i->build_lock); + sbi->on_build_free_nids = 1; + build_free_nids(sbi); + sbi->on_build_free_nids = 0; + mutex_unlock(&nm_i->build_lock); + goto retry; +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + __del_from_free_nid_list(i); + spin_unlock(&nm_i->free_nid_list_lock); +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + if (!nid) + return; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { + __del_from_free_nid_list(i); + } else { + i->state = NID_NEW; + nm_i->fcnt++; + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, + struct f2fs_summary *sum, struct node_info *ni, + block_t new_blkaddr) +{ + rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); + set_node_addr(sbi, ni, new_blkaddr); + clear_node_page_dirty(page); +} + +int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct f2fs_node *src, *dst; + nid_t ino = ino_of_node(page); + struct node_info old_ni, new_ni; + struct page *ipage; + int err; + + ipage = grab_cache_page(mapping, ino); + if (!ipage) + return -ENOMEM; + + /* Should not use this inode from free nid list */ + remove_free_nid(NM_I(sbi), ino); + + get_node_info(sbi, ino, &old_ni); + SetPageUptodate(ipage); + fill_node_footer(ipage, ino, ino, 0, true); + + src = F2FS_NODE(page); + dst = F2FS_NODE(ipage); + + memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); + dst->i.i_size = 0; + dst->i.i_blocks = cpu_to_le64(1); + dst->i.i_links = cpu_to_le32(1); + dst->i.i_xattr_nid = 0; + + new_ni = old_ni; + new_ni.ino = ino; + + err = set_node_addr(sbi, &new_ni, NEW_ADDR); + if (!err) + if (!inc_valid_node_count(sbi, NULL, 1)) + err = -ENOSPC; + if (!err) + inc_valid_inode_count(sbi); + f2fs_put_page(ipage, 1); + return err; +} + +int restore_node_summary(struct f2fs_sb_info *sbi, + unsigned int segno, struct f2fs_summary_block *sum) +{ + struct f2fs_node *rn; + struct f2fs_summary *sum_entry; + struct page *page; + block_t addr; + int i, last_offset; + + /* alloc temporal page for read node */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (!page) + return -ENOMEM; + lock_page(page); + + /* scan the node segment */ + last_offset = sbi->blocks_per_seg; + addr = START_BLOCK(sbi, segno); + sum_entry = &sum->entries[0]; + + for (i = 0; i < last_offset; i++, sum_entry++) { + /* + * In order to read next node page, + * we must clear PageUptodate flag. + */ + ClearPageUptodate(page); + + if (f2fs_readpage(sbi, page, addr, READ_SYNC)) + goto out; + + lock_page(page); + rn = F2FS_NODE(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + addr++; + } + unlock_page(page); +out: + __free_pages(page, 0); + return 0; +} + +static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + mutex_lock(&curseg->curseg_mutex); + + if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { + mutex_unlock(&curseg->curseg_mutex); + return false; + } + + for (i = 0; i < nats_in_cursum(sum); i++) { + struct nat_entry *ne; + struct f2fs_nat_entry raw_ne; + nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); + + raw_ne = nat_in_journal(sum, i); +retry: + write_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne) { + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + continue; + } + ne = grab_nat_entry(nm_i, nid); + if (!ne) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); + nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); + nat_set_version(ne, raw_ne.version); + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } + update_nats_in_cursum(sum, -i); + mutex_unlock(&curseg->curseg_mutex); + return true; +} + +/* + * This function is called during the checkpointing process. + */ +void flush_nat_entries(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + struct list_head *cur, *n; + struct page *page = NULL; + struct f2fs_nat_block *nat_blk = NULL; + nid_t start_nid = 0, end_nid = 0; + bool flushed; + + flushed = flush_nats_in_journal(sbi); + + if (!flushed) + mutex_lock(&curseg->curseg_mutex); + + /* 1) flush dirty nat caches */ + list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { + struct nat_entry *ne; + nid_t nid; + struct f2fs_nat_entry raw_ne; + int offset = -1; + block_t new_blkaddr; + + ne = list_entry(cur, struct nat_entry, list); + nid = nat_get_nid(ne); + + if (nat_get_blkaddr(ne) == NEW_ADDR) + continue; + if (flushed) + goto to_nat_page; + + /* if there is room for nat enries in curseg->sumpage */ + offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); + if (offset >= 0) { + raw_ne = nat_in_journal(sum, offset); + goto flush_now; + } +to_nat_page: + if (!page || (start_nid > nid || nid > end_nid)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + start_nid = START_NID(nid); + end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; + + /* + * get nat block with dirty flag, increased reference + * count, mapped and lock + */ + page = get_next_nat_page(sbi, start_nid); + nat_blk = page_address(page); + } + + BUG_ON(!nat_blk); + raw_ne = nat_blk->entries[nid - start_nid]; +flush_now: + new_blkaddr = nat_get_blkaddr(ne); + + raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); + raw_ne.block_addr = cpu_to_le32(new_blkaddr); + raw_ne.version = nat_get_version(ne); + + if (offset < 0) { + nat_blk->entries[nid - start_nid] = raw_ne; + } else { + nat_in_journal(sum, offset) = raw_ne; + nid_in_journal(sum, offset) = cpu_to_le32(nid); + } + + if (nat_get_blkaddr(ne) == NULL_ADDR && + add_free_nid(NM_I(sbi), nid, false) <= 0) { + write_lock(&nm_i->nat_tree_lock); + __del_from_nat_cache(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } else { + write_lock(&nm_i->nat_tree_lock); + __clear_nat_cache_dirty(nm_i, ne); + ne->checkpointed = true; + write_unlock(&nm_i->nat_tree_lock); + } + } + if (!flushed) + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(page, 1); + + /* 2) shrink nat caches if necessary */ + try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); +} + +static int init_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned char *version_bitmap; + unsigned int nat_segs, nat_blocks; + + nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); + + /* segment_count_nat includes pair segment so divide to 2. */ + nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; + nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + nm_i->fcnt = 0; + nm_i->nat_cnt = 0; + + INIT_LIST_HEAD(&nm_i->free_nid_list); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); + INIT_LIST_HEAD(&nm_i->nat_entries); + INIT_LIST_HEAD(&nm_i->dirty_nat_entries); + + mutex_init(&nm_i->build_lock); + spin_lock_init(&nm_i->free_nid_list_lock); + rwlock_init(&nm_i->nat_tree_lock); + + nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); + nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); + version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); + if (!version_bitmap) + return -EFAULT; + + nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, + GFP_KERNEL); + if (!nm_i->nat_bitmap) + return -ENOMEM; + return 0; +} + +int build_node_manager(struct f2fs_sb_info *sbi) +{ + int err; + + sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); + if (!sbi->nm_info) + return -ENOMEM; + + err = init_node_manager(sbi); + if (err) + return err; + + build_free_nids(sbi); + return 0; +} + +void destroy_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i, *next_i; + struct nat_entry *natvec[NATVEC_SIZE]; + nid_t nid = 0; + unsigned int found; + + if (!nm_i) + return; + + /* destroy free nid list */ + spin_lock(&nm_i->free_nid_list_lock); + list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { + BUG_ON(i->state == NID_ALLOC); + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + BUG_ON(nm_i->fcnt); + spin_unlock(&nm_i->free_nid_list_lock); + + /* destroy nat cache */ + write_lock(&nm_i->nat_tree_lock); + while ((found = __gang_lookup_nat_cache(nm_i, + nid, NATVEC_SIZE, natvec))) { + unsigned idx; + for (idx = 0; idx < found; idx++) { + struct nat_entry *e = natvec[idx]; + nid = nat_get_nid(e) + 1; + __del_from_nat_cache(nm_i, e); + } + } + BUG_ON(nm_i->nat_cnt); + write_unlock(&nm_i->nat_tree_lock); + + kfree(nm_i->nat_bitmap); + sbi->nm_info = NULL; + kfree(nm_i); +} + +int __init create_node_manager_caches(void) +{ + nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + sizeof(struct nat_entry), NULL); + if (!nat_entry_slab) + return -ENOMEM; + + free_nid_slab = f2fs_kmem_cache_create("free_nid", + sizeof(struct free_nid), NULL); + if (!free_nid_slab) { + kmem_cache_destroy(nat_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_node_manager_caches(void) +{ + kmem_cache_destroy(free_nid_slab); + kmem_cache_destroy(nat_entry_slab); +} diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h new file mode 100644 index 00000000000..7ee2f9e7e88 --- /dev/null +++ b/fs/f2fs/node.h @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/node.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* start node id of a node block dedicated to the given node id */ +#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) + +/* node block offset on the NAT area dedicated to the given start node id */ +#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) + +/* # of pages to perform readahead before building free nids */ +#define FREE_NID_PAGES 4 + +/* maximum # of free node ids to produce during build_free_nids */ +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) + +/* maximum readahead size for node during getting data blocks */ +#define MAX_RA_NODE 128 + +/* maximum cached nat entries to manage memory footprint */ +#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) + +/* vector size for gang look-up from nat cache that consists of radix tree */ +#define NATVEC_SIZE 64 + +/* return value for read_node_page */ +#define LOCKED_PAGE 1 + +/* + * For node information + */ +struct node_info { + nid_t nid; /* node id */ + nid_t ino; /* inode number of the node's owner */ + block_t blk_addr; /* block address of the node */ + unsigned char version; /* version of the node */ +}; + +struct nat_entry { + struct list_head list; /* for clean or dirty nat list */ + bool checkpointed; /* whether it is checkpointed or not */ + struct node_info ni; /* in-memory node information */ +}; + +#define nat_get_nid(nat) (nat->ni.nid) +#define nat_set_nid(nat, n) (nat->ni.nid = n) +#define nat_get_blkaddr(nat) (nat->ni.blk_addr) +#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) +#define nat_get_ino(nat) (nat->ni.ino) +#define nat_set_ino(nat, i) (nat->ni.ino = i) +#define nat_get_version(nat) (nat->ni.version) +#define nat_set_version(nat, v) (nat->ni.version = v) + +#define __set_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->dirty_nat_entries); +#define __clear_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->nat_entries); +#define inc_node_version(version) (++version) + +static inline void node_info_from_raw_nat(struct node_info *ni, + struct f2fs_nat_entry *raw_ne) +{ + ni->ino = le32_to_cpu(raw_ne->ino); + ni->blk_addr = le32_to_cpu(raw_ne->block_addr); + ni->version = raw_ne->version; +} + +/* + * For free nid mangement + */ +enum nid_state { + NID_NEW, /* newly added to free nid list */ + NID_ALLOC /* it is allocated */ +}; + +struct free_nid { + struct list_head list; /* for free node id list */ + nid_t nid; /* node id */ + int state; /* in use or not: NID_NEW or NID_ALLOC */ +}; + +static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *fnid; + + if (nm_i->fcnt <= 0) + return -1; + spin_lock(&nm_i->free_nid_list_lock); + fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); + *nid = fnid->nid; + spin_unlock(&nm_i->free_nid_list_lock); + return 0; +} + +/* + * inline functions + */ +static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); +} + +static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + pgoff_t block_off; + pgoff_t block_addr; + int seg_off; + + block_off = NAT_BLOCK_OFFSET(start); + seg_off = block_off >> sbi->log_blocks_per_seg; + + block_addr = (pgoff_t)(nm_i->nat_blkaddr + + (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + block_addr += sbi->blocks_per_seg; + + return block_addr; +} + +static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + block_addr -= nm_i->nat_blkaddr; + if ((block_addr >> sbi->log_blocks_per_seg) % 2) + block_addr -= sbi->blocks_per_seg; + else + block_addr += sbi->blocks_per_seg; + + return block_addr + nm_i->nat_blkaddr; +} + +static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) +{ + unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + f2fs_clear_bit(block_off, nm_i->nat_bitmap); + else + f2fs_set_bit(block_off, nm_i->nat_bitmap); +} + +static inline void fill_node_footer(struct page *page, nid_t nid, + nid_t ino, unsigned int ofs, bool reset) +{ + struct f2fs_node *rn = F2FS_NODE(page); + if (reset) + memset(rn, 0, sizeof(*rn)); + rn->footer.nid = cpu_to_le32(nid); + rn->footer.ino = cpu_to_le32(ino); + rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); +} + +static inline void copy_node_footer(struct page *dst, struct page *src) +{ + struct f2fs_node *src_rn = F2FS_NODE(src); + struct f2fs_node *dst_rn = F2FS_NODE(dst); + memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); +} + +static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_node *rn = F2FS_NODE(page); + + rn->footer.cp_ver = ckpt->checkpoint_ver; + rn->footer.next_blkaddr = cpu_to_le32(blkaddr); +} + +static inline nid_t ino_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.ino); +} + +static inline nid_t nid_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.nid); +} + +static inline unsigned int ofs_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + unsigned flag = le32_to_cpu(rn->footer.flag); + return flag >> OFFSET_BIT_SHIFT; +} + +static inline unsigned long long cpver_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le64_to_cpu(rn->footer.cp_ver); +} + +static inline block_t next_blkaddr_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.next_blkaddr); +} + +/* + * f2fs assigns the following node offsets described as (num). + * N = NIDS_PER_BLOCK + * + * Inode block (0) + * |- direct node (1) + * |- direct node (2) + * |- indirect node (3) + * | `- direct node (4 => 4 + N - 1) + * |- indirect node (4 + N) + * | `- direct node (5 + N => 5 + 2N - 1) + * `- double indirect node (5 + 2N) + * `- indirect node (6 + 2N) + * `- direct node (x(N + 1)) + */ +static inline bool IS_DNODE(struct page *node_page) +{ + unsigned int ofs = ofs_of_node(node_page); + + if (ofs == XATTR_NODE_OFFSET) + return false; + + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || + ofs == 5 + 2 * NIDS_PER_BLOCK) + return false; + if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { + ofs -= 6 + 2 * NIDS_PER_BLOCK; + if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) + return false; + } + return true; +} + +static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +{ + struct f2fs_node *rn = F2FS_NODE(p); + + wait_on_page_writeback(p); + + if (i) + rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); + else + rn->in.nid[off] = cpu_to_le32(nid); + set_page_dirty(p); +} + +static inline nid_t get_nid(struct page *p, int off, bool i) +{ + struct f2fs_node *rn = F2FS_NODE(p); + + if (i) + return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); + return le32_to_cpu(rn->in.nid[off]); +} + +/* + * Coldness identification: + * - Mark cold files in f2fs_inode_info + * - Mark cold node blocks in their node footer + * - Mark cold data pages in page cache + */ +static inline int is_file(struct inode *inode, int type) +{ + return F2FS_I(inode)->i_advise & type; +} + +static inline void set_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise |= type; +} + +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) + +static inline int is_cold_data(struct page *page) +{ + return PageChecked(page); +} + +static inline void set_cold_data(struct page *page) +{ + SetPageChecked(page); +} + +static inline void clear_cold_data(struct page *page) +{ + ClearPageChecked(page); +} + +static inline int is_node(struct page *page, int type) +{ + struct f2fs_node *rn = F2FS_NODE(page); + return le32_to_cpu(rn->footer.flag) & (1 << type); +} + +#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) + +static inline void set_cold_node(struct inode *inode, struct page *page) +{ + struct f2fs_node *rn = F2FS_NODE(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + + if (S_ISDIR(inode->i_mode)) + flag &= ~(0x1 << COLD_BIT_SHIFT); + else + flag |= (0x1 << COLD_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_mark(struct page *page, int mark, int type) +{ + struct f2fs_node *rn = F2FS_NODE(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << type); + else + flag &= ~(0x1 << type); + rn->footer.flag = cpu_to_le32(flag); +} +#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c new file mode 100644 index 00000000000..773752780af --- /dev/null +++ b/fs/f2fs/recovery.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2014 XPerience(R) Project + * + * fs/f2fs/recovery.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "f2fs.h" +#include "node.h" +#include "segment.h" + +static struct kmem_cache *fsync_entry_slab; + +bool space_for_roll_forward(struct f2fs_sb_info *sbi) +{ + if (sbi->last_valid_block_count + sbi->alloc_valid_block_count + > sbi->user_block_count) + return false; + return true; +} + +static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, + nid_t ino) +{ + struct list_head *this; + struct fsync_inode_entry *entry; + + list_for_each(this, head) { + entry = list_entry(this, struct fsync_inode_entry, list); + if (entry->inode->i_ino == ino) + return entry; + } + return NULL; +} + +static int recover_dentry(struct page *ipage, struct inode *inode) +{ + struct f2fs_node *raw_node = F2FS_NODE(ipage); + struct f2fs_inode *raw_inode = &(raw_node->i); + nid_t pino = le32_to_cpu(raw_inode->i_pino); + struct f2fs_dir_entry *de; + struct qstr name; + struct page *page; + struct inode *dir, *einode; + int err = 0; + + dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); + if (!dir) { + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + f2fs_msg(inode->i_sb, KERN_INFO, + "%s: f2fs_iget failed: %ld", + __func__, PTR_ERR(dir)); + err = PTR_ERR(dir); + goto out; + } + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + add_dirty_dir_inode(dir); + } + + name.len = le32_to_cpu(raw_inode->i_namelen); + name.name = raw_inode->i_name; +retry: + de = f2fs_find_entry(dir, &name, &page); + if (de && inode->i_ino == le32_to_cpu(de->ino)) + goto out_unmap_put; + if (de) { + einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); + if (IS_ERR(einode)) { + WARN_ON(1); + if (PTR_ERR(einode) == -ENOENT) + err = -EEXIST; + goto out_unmap_put; + } + err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); + if (err) { + iput(einode); + goto out_unmap_put; + } + f2fs_delete_entry(de, page, einode); + iput(einode); + goto retry; + } + err = __f2fs_add_link(dir, &name, inode); + goto out; + +out_unmap_put: + kunmap(page); + f2fs_put_page(page, 0); +out: + f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode and its dentry: " + "ino = %x, name = %s, dir = %lx, err = %d", + ino_of_node(ipage), raw_inode->i_name, + IS_ERR(dir) ? 0 : dir->i_ino, err); + return err; +} + +static int recover_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_node *raw_node = F2FS_NODE(node_page); + struct f2fs_inode *raw_inode = &(raw_node->i); + + if (!IS_INODE(node_page)) + return 0; + + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + i_size_write(inode, le64_to_cpu(raw_inode->i_size)); + inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + + if (is_dent_dnode(node_page)) + return recover_dentry(node_page, inode); + + f2fs_msg(inode->i_sb, KERN_DEBUG, "recover_inode: ino = %x, name = %s", + ino_of_node(node_page), raw_inode->i_name); + return 0; +} + +static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) +{ + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); + struct curseg_info *curseg; + struct page *page; + block_t blkaddr; + int err = 0; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); + blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; + + /* read node page */ + page = alloc_page(GFP_F2FS_ZERO); + if (!page) + return -ENOMEM; + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) + goto out; + + lock_page(page); + + if (cp_ver != cpver_of_node(page)) + break; + + if (!is_fsync_dnode(page)) + goto next; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (entry) { + if (IS_INODE(page) && is_dent_dnode(page)) + set_inode_flag(F2FS_I(entry->inode), + FI_INC_LINK); + } else { + if (IS_INODE(page) && is_dent_dnode(page)) { + err = recover_inode_page(sbi, page); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: recover_inode_page failed: %d", + __func__, err); + break; + } + } + + /* add this fsync inode to the list */ + entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); + if (!entry) { + err = -ENOMEM; + break; + } + + entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); + if (IS_ERR(entry->inode)) { + err = PTR_ERR(entry->inode); + f2fs_msg(sbi->sb, KERN_INFO, + "%s: f2fs_iget failed: %d", + __func__, err); + kmem_cache_free(fsync_entry_slab, entry); + break; + } + list_add_tail(&entry->list, head); + } + entry->blkaddr = blkaddr; + + err = recover_inode(entry->inode, page); + if (err && err != -ENOENT) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: recover_inode failed: %d", + __func__, err); + break; + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + } + unlock_page(page); +out: + __free_pages(page, 0); + return err; +} + +static void destroy_fsync_dnodes(struct list_head *head) +{ + struct fsync_inode_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, head, list) { + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +} + +static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, + block_t blkaddr, struct dnode_of_data *dn) +{ + struct seg_entry *sentry; + unsigned int segno = GET_SEGNO(sbi, blkaddr); + unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & + (sbi->blocks_per_seg - 1); + struct f2fs_summary sum; + nid_t ino, nid; + void *kaddr; + struct inode *inode; + struct page *node_page; + unsigned int offset; + block_t bidx; + int i; + + sentry = get_seg_entry(sbi, segno); + if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) + return 0; + + /* Get the previous summary */ + for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + if (curseg->segno == segno) { + sum = curseg->sum_blk->entries[blkoff]; + break; + } + } + if (i > CURSEG_COLD_DATA) { + struct page *sum_page = get_sum_page(sbi, segno); + struct f2fs_summary_block *sum_node; + kaddr = page_address(sum_page); + sum_node = (struct f2fs_summary_block *)kaddr; + sum = sum_node->entries[blkoff]; + f2fs_put_page(sum_page, 1); + } + + /* Use the locked dnode page and inode */ + nid = le32_to_cpu(sum.nid); + if (dn->inode->i_ino == nid) { + struct dnode_of_data tdn = *dn; + tdn.nid = nid; + tdn.node_page = dn->inode_page; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } else if (dn->nid == nid) { + struct dnode_of_data tdn = *dn; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } + + /* Get the node page */ + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + offset = ofs_of_node(node_page); + ino = ino_of_node(node_page); + f2fs_put_page(node_page, 1); + + /* Skip nodes with circular references */ + if (ino == dn->inode->i_ino) { + f2fs_msg(sbi->sb, KERN_ERR, "%s: node %x has circular inode %x", + __func__, ino, nid); + f2fs_handle_error(sbi); + return -EDEADLK; + } + + /* Deallocate previous index in the node page */ + inode = f2fs_iget(sbi->sb, ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + bidx = start_bidx_of_node(offset, F2FS_I(inode)) + + le16_to_cpu(sum.ofs_in_node); + + truncate_hole(inode, bidx, bidx + 1); + iput(inode); + return 0; +} + +static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, + struct page *page, block_t blkaddr) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int start, end; + struct dnode_of_data dn; + struct f2fs_summary sum; + struct node_info ni; + int err = 0, recovered = 0; + int ilock; + + start = start_bidx_of_node(ofs_of_node(page), fi); + if (IS_INODE(page)) + end = start + ADDRS_PER_INODE(fi); + else + end = start + ADDRS_PER_BLOCK; + + ilock = mutex_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); + + err = get_dnode_of_data(&dn, start, ALLOC_NODE); + if (err) { + mutex_unlock_op(sbi, ilock); + f2fs_msg(sbi->sb, KERN_INFO, + "%s: get_dnode_of_data failed: %d", __func__, err); + return err; + } + + wait_on_page_writeback(dn.node_page); + + get_node_info(sbi, dn.nid, &ni); + BUG_ON(ni.ino != ino_of_node(page)); + BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); + + for (; start < end; start++) { + block_t src, dest; + + src = datablock_addr(dn.node_page, dn.ofs_in_node); + dest = datablock_addr(page, dn.ofs_in_node); + + if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { + if (src == NULL_ADDR) { + int err = reserve_new_block(&dn); + /* We should not get -ENOSPC */ + if (err) + f2fs_msg(sbi->sb, KERN_INFO, + "%s: reserve_new_block failed: %d", + __func__, err); + BUG_ON(err); + } + + /* Check the previous node page having this index */ + err = check_index_in_prev_nodes(sbi, dest, &dn); + if (err) + goto err; + + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); + + /* write dummy data page */ + recover_data_page(sbi, NULL, &sum, src, dest); + update_extent_cache(dest, &dn); + recovered++; + } + dn.ofs_in_node++; + } + + /* write node page in place */ + set_summary(&sum, dn.nid, 0, 0); + if (IS_INODE(dn.node_page)) + sync_inode_page(&dn); + + copy_node_footer(dn.node_page, page); + fill_node_footer(dn.node_page, dn.nid, ni.ino, + ofs_of_node(page), false); + set_page_dirty(dn.node_page); + + recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); +err: + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); + + f2fs_msg(sbi->sb, KERN_DEBUG, "recover_data: ino = %lx, " + "recovered_data = %d blocks, err = %d", + inode->i_ino, recovered, err); + return err; +} + +static int recover_data(struct f2fs_sb_info *sbi, + struct list_head *head, int type) +{ + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); + struct curseg_info *curseg; + struct page *page; + int err = 0; + block_t blkaddr; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, type); + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + + /* read node page */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (!page) + return -ENOMEM; + + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: f2fs_readpage failed: %d", + __func__, err); + goto out; + } + + lock_page(page); + + if (cp_ver != cpver_of_node(page)) + break; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (!entry) + goto next; + + err = do_recover_data(sbi, entry->inode, page, blkaddr); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: do_recover_data failed: %d", + __func__, err); + break; + } + + if (entry->blkaddr == blkaddr) { + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + } + unlock_page(page); +out: + __free_pages(page, 0); + + if (!err) + allocate_new_segments(sbi); + return err; +} + +int recover_fsync_data(struct f2fs_sb_info *sbi) +{ + struct list_head inode_list; + int err; + + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", + sizeof(struct fsync_inode_entry), NULL); + if (unlikely(!fsync_entry_slab)) + return -ENOMEM; + + INIT_LIST_HEAD(&inode_list); + + /* step #1: find fsynced inode numbers */ + sbi->por_doing = 1; + err = find_fsync_dnodes(sbi, &inode_list); + if (err) { + f2fs_msg(sbi->sb, KERN_INFO, + "%s: find_fsync_dnodes failed: %d", __func__, err); + goto out; + } + + if (list_empty(&inode_list)) + goto out; + + /* step #2: recover data */ + err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + if (!list_empty(&inode_list)) { + f2fs_handle_error(sbi); + err = -EIO; + } +out: + destroy_fsync_dnodes(&inode_list); + kmem_cache_destroy(fsync_entry_slab); + sbi->por_doing = 0; + if (!err) { + f2fs_msg(sbi->sb, KERN_INFO, "recovery complete"); + write_checkpoint(sbi, false); + } else + f2fs_msg(sbi->sb, KERN_ERR, "recovery did not fully complete"); + + return err; +} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c new file mode 100644 index 00000000000..5a88640ece0 --- /dev/null +++ b/fs/f2fs/segment.c @@ -0,0 +1,1789 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/segment.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "segment.h" +#include "node.h" +#include + +/* + * This function balances dirty node and dentry pages. + * In addition, it controls garbage collection. + */ +void f2fs_balance_fs(struct f2fs_sb_info *sbi) +{ + /* + * We should do GC or end up with checkpoint, if there are so many dirty + * dir/node pages without enough free segments. + */ + if (has_not_enough_free_secs(sbi, 0)) { + mutex_lock(&sbi->gc_mutex); + f2fs_gc(sbi); + } +} + +static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + /* need not be added */ + if (IS_CURSEG(sbi, segno)) + return; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + + if (dirty_type == DIRTY) { + struct seg_entry *sentry = get_seg_entry(sbi, segno); + enum dirty_type t = DIRTY_HOT_DATA; + + dirty_type = sentry->type; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + + /* Only one bitmap should be set */ + for (; t <= DIRTY_COLD_NODE; t++) { + if (t == dirty_type) + continue; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + } + } +} + +static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]--; + + if (dirty_type == DIRTY) { + enum dirty_type t = DIRTY_HOT_DATA; + + /* clear all the bitmaps */ + for (; t <= DIRTY_COLD_NODE; t++) + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + + if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + clear_bit(GET_SECNO(sbi, segno), + dirty_i->victim_secmap); + } +} + +/* + * Should not occur error such as -ENOMEM. + * Adding dirty entry into seglist is not critical operation. + * If a given segment is one of current working segments, it won't be added. + */ +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned short valid_blocks; + + if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + return; + + mutex_lock(&dirty_i->seglist_lock); + + valid_blocks = get_valid_blocks(sbi, segno, 0); + + if (valid_blocks == 0) { + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } else if (valid_blocks < sbi->blocks_per_seg) { + __locate_dirty_segment(sbi, segno, DIRTY); + } else { + /* Recovery routine with SSR needs this */ + __remove_dirty_segment(sbi, segno, DIRTY); + } + + mutex_unlock(&dirty_i->seglist_lock); +} + +/* + * Should call clear_prefree_segments after checkpoint is done. + */ +static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = -1; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + segno + 1); + if (segno >= total_segs) + break; + __set_test_and_free(sbi, segno); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +void clear_prefree_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = -1; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + segno + 1); + if (segno >= total_segs) + break; + + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) + dirty_i->nr_dirty[PRE]--; + + /* Let's use trim */ + if (test_opt(sbi, DISCARD)) + blkdev_issue_discard(sbi->sb->s_bdev, + START_BLOCK(sbi, segno) << + sbi->log_sectors_per_block, + 1 << (sbi->log_sectors_per_block + + sbi->log_blocks_per_seg), + GFP_NOFS, 0); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) + sit_i->dirty_sentries++; +} + +static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, + unsigned int segno, int modified) +{ + struct seg_entry *se = get_seg_entry(sbi, segno); + se->type = type; + if (modified) + __mark_sit_entry_dirty(sbi, segno); +} + +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + bool check_map = false; + + segno = GET_SEGNO(sbi, blkaddr); + + se = get_seg_entry(sbi, segno); + new_vblocks = se->valid_blocks + del; + offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); + + if (new_vblocks < 0 || new_vblocks > sbi->blocks_per_seg || + (new_vblocks >> (sizeof(unsigned short) << 3))) + if (f2fs_handle_error(sbi)) + check_map = true; + + se->mtime = get_mtime(sbi); + SIT_I(sbi)->max_mtime = se->mtime; + + /* Update valid block bitmap */ + if (del > 0) { + if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_handle_error(sbi)) + check_map = true; + } else { + if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (f2fs_handle_error(sbi)) + check_map = true; + } + + if (unlikely(check_map)) { + int i; + long int vblocks = 0; + + f2fs_msg(sbi->sb, KERN_ERR, + "cannot %svalidate block %u in segment %u with %hu valid blocks", + (del < 0) ? "in" : "", + offset, segno, se->valid_blocks); + + /* assume the count was stale to start */ + del = 0; + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, se->cur_valid_map)) + vblocks++; + if (vblocks != se->valid_blocks) { + f2fs_msg(sbi->sb, KERN_INFO, "correcting valid block " + "counts %d -> %ld", se->valid_blocks, vblocks); + /* make accounting corrections */ + del = vblocks - se->valid_blocks; + } + } + se->valid_blocks += del; + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + __mark_sit_entry_dirty(sbi, segno); + + /* update total number of valid blocks to be written in ckpt area */ + SIT_I(sbi)->written_valid_blocks += del; + + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, segno)->valid_blocks += del; +} + +static void refresh_sit_entry(struct f2fs_sb_info *sbi, + block_t old_blkaddr, block_t new_blkaddr) +{ + update_sit_entry(sbi, new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); +} + +void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +{ + unsigned int segno = GET_SEGNO(sbi, addr); + struct sit_info *sit_i = SIT_I(sbi); + + BUG_ON(addr == NULL_ADDR); + if (addr == NEW_ADDR) + return; + + if (segno >= TOTAL_SEGS(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, "invalid segment number %u", segno); + if (f2fs_handle_error(sbi)) + return; + } + + /* add it into sit main buffer */ + mutex_lock(&sit_i->sentry_lock); + + update_sit_entry(sbi, addr, -1); + + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + mutex_unlock(&sit_i->sentry_lock); +} + +/* + * This function should be resided under the curseg_mutex lock + */ +static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, + struct f2fs_summary *sum) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + void *addr = curseg->sum_blk; + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); + memcpy(addr, sum, sizeof(struct f2fs_summary)); +} + +/* + * Calculate the number of current summary pages for writing + */ +int npages_for_summary_flush(struct f2fs_sb_info *sbi) +{ + int total_size_bytes = 0; + int valid_sum_count = 0; + int i, sum_space; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (sbi->ckpt->alloc_type[i] == SSR) + valid_sum_count += sbi->blocks_per_seg; + else + valid_sum_count += curseg_blkoff(sbi, i); + } + + total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) + + sizeof(struct nat_journal) + 2 + + sizeof(struct sit_journal) + 2; + sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; + if (total_size_bytes < sum_space) + return 1; + else if (total_size_bytes < 2 * sum_space) + return 2; + return 3; +} + +/* + * Caller should put this summary page + */ +struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +{ + return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); +} + +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) +{ + struct page *page = grab_meta_page(sbi, blk_addr); + void *kaddr = page_address(page); + memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno + 1; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); + return 0; +} + +/* + * Find a new segment from the free segments bitmap to right order + * This function should be returned with success, otherwise BUG + */ +static void get_new_segment(struct f2fs_sb_info *sbi, + unsigned int *newseg, bool new_sec, int dir) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int segno, secno, zoneno; + unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; + unsigned int hint = *newseg / sbi->segs_per_sec; + unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int left_start = hint; + bool init = true; + int go_left = 0; + int i; + + write_lock(&free_i->segmap_lock); + + if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { + segno = find_next_zero_bit(free_i->free_segmap, + TOTAL_SEGS(sbi), *newseg + 1); + if (segno - *newseg < sbi->segs_per_sec - + (*newseg % sbi->segs_per_sec)) + goto got_it; + } +find_other_zone: + secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); + if (secno >= TOTAL_SECS(sbi)) { + if (dir == ALLOC_RIGHT) { + secno = find_next_zero_bit(free_i->free_secmap, + TOTAL_SECS(sbi), 0); + BUG_ON(secno >= TOTAL_SECS(sbi)); + } else { + go_left = 1; + left_start = hint - 1; + } + } + if (go_left == 0) + goto skip_left; + + while (test_bit(left_start, free_i->free_secmap)) { + if (left_start > 0) { + left_start--; + continue; + } + left_start = find_next_zero_bit(free_i->free_secmap, + TOTAL_SECS(sbi), 0); + BUG_ON(left_start >= TOTAL_SECS(sbi)); + break; + } + secno = left_start; +skip_left: + hint = secno; + segno = secno * sbi->segs_per_sec; + zoneno = secno / sbi->secs_per_zone; + + /* give up on finding another zone */ + if (!init) + goto got_it; + if (sbi->secs_per_zone == 1) + goto got_it; + if (zoneno == old_zoneno) + goto got_it; + if (dir == ALLOC_LEFT) { + if (!go_left && zoneno + 1 >= total_zones) + goto got_it; + if (go_left && zoneno == 0) + goto got_it; + } + for (i = 0; i < NR_CURSEG_TYPE; i++) + if (CURSEG_I(sbi, i)->zone == zoneno) + break; + + if (i < NR_CURSEG_TYPE) { + /* zone is in user, try another */ + if (go_left) + hint = zoneno * sbi->secs_per_zone - 1; + else if (zoneno + 1 >= total_zones) + hint = 0; + else + hint = (zoneno + 1) * sbi->secs_per_zone; + init = false; + goto find_other_zone; + } +got_it: + /* set it as dirty segment in free segmap */ + BUG_ON(test_bit(segno, free_i->free_segmap)); + __set_inuse(sbi, segno); + *newseg = segno; + write_unlock(&free_i->segmap_lock); +} + +static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + struct summary_footer *sum_footer; + + curseg->segno = curseg->next_segno; + curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->next_blkoff = 0; + curseg->next_segno = NULL_SEGNO; + + sum_footer = &(curseg->sum_blk->footer); + memset(sum_footer, 0, sizeof(struct summary_footer)); + if (IS_DATASEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); + if (IS_NODESEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); + __set_sit_entry_type(sbi, type, curseg->segno, modified); +} + +/* + * Allocate a current working segment. + * This function always allocates a free segment in LFS manner. + */ +static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno; + int dir = ALLOC_LEFT; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, segno)); + if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) + dir = ALLOC_RIGHT; + + if (test_opt(sbi, NOHEAP)) + dir = ALLOC_RIGHT; + + get_new_segment(sbi, &segno, new_sec, dir); + curseg->next_segno = segno; + reset_curseg(sbi, type, 1); + curseg->alloc_type = LFS; +} + +static void __next_free_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg, block_t start) +{ + struct seg_entry *se = get_seg_entry(sbi, seg->segno); + block_t ofs; + for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { + if (!f2fs_test_bit(ofs, se->ckpt_valid_map) + && !f2fs_test_bit(ofs, se->cur_valid_map)) + break; + } + seg->next_blkoff = ofs; +} + +/* + * If a segment is written by LFS manner, next block offset is just obtained + * by increasing the current block offset. However, if a segment is written by + * SSR manner, next block offset obtained by calling __next_free_blkoff + */ +static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg) +{ + if (seg->alloc_type == SSR) + __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); + else + seg->next_blkoff++; +} + +/* + * This function always allocates a used segment (from dirty seglist) by SSR + * manner, so it should recover the existing segment information of valid blocks + */ +static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int new_segno = curseg->next_segno; + struct f2fs_summary_block *sum_node; + struct page *sum_page; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); + __set_test_and_inuse(sbi, new_segno); + + mutex_lock(&dirty_i->seglist_lock); + __remove_dirty_segment(sbi, new_segno, PRE); + __remove_dirty_segment(sbi, new_segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + + reset_curseg(sbi, type, 1); + curseg->alloc_type = SSR; + __next_free_blkoff(sbi, curseg, 0); + + if (reuse) { + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); + } +} + +static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + + if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) + return v_ops->get_victim(sbi, + &(curseg)->next_segno, BG_GC, type, SSR); + + /* For data segments, let's do SSR more intensively */ + for (; type >= CURSEG_HOT_DATA; type--) + if (v_ops->get_victim(sbi, &(curseg)->next_segno, + BG_GC, type, SSR)) + return 1; + return 0; +} + +/* + * flush out current segment and replace it with new segment + * This function should be returned with success, otherwise BUG + */ +static void allocate_segment_by_default(struct f2fs_sb_info *sbi, + int type, bool force) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + + if (force) + new_curseg(sbi, type, true); + else if (type == CURSEG_WARM_NODE) + new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); + else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type, true); + else + new_curseg(sbi, type, false); +#ifdef CONFIG_F2FS_STAT_FS + sbi->segment_count[curseg->alloc_type]++; +#endif +} + +void allocate_new_segments(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg; + unsigned int old_curseg; + int i; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + curseg = CURSEG_I(sbi, i); + old_curseg = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_curseg); + } +} + +static const struct segment_allocation default_salloc_ops = { + .allocate_segment = allocate_segment_by_default, +}; + +static void f2fs_end_io_write(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_private *p = bio->bi_private; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (!uptodate) { + SetPageError(page); + if (page->mapping) + set_bit(AS_EIO, &page->mapping->flags); + set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); + p->sbi->sb->s_flags |= MS_RDONLY; + } + end_page_writeback(page); + dec_page_count(p->sbi, F2FS_WRITEBACK); + } while (bvec >= bio->bi_io_vec); + + if (p->is_sync) + complete(p->wait); + kfree(p); + bio_put(bio); +} + +struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) +{ + struct bio *bio; + + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + bio->bi_bdev = bdev; + bio->bi_private = NULL; + + return bio; +} + +static void do_submit_bio(struct f2fs_sb_info *sbi, + enum page_type type, bool sync) +{ + int rw = sync ? WRITE_SYNC : WRITE; + enum page_type btype = type > META ? META : type; + + if (type >= META_FLUSH) + rw = WRITE_FLUSH_FUA; + + if (btype == META) + rw |= REQ_META; + + if (sbi->bio[btype]) { + struct bio_private *p = sbi->bio[btype]->bi_private; + p->sbi = sbi; + sbi->bio[btype]->bi_end_io = f2fs_end_io_write; + + trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); + + if (type == META_FLUSH) { + DECLARE_COMPLETION_ONSTACK(wait); + p->is_sync = true; + p->wait = &wait; + submit_bio(rw, sbi->bio[btype]); + wait_for_completion(&wait); + } else { + p->is_sync = false; + submit_bio(rw, sbi->bio[btype]); + } + sbi->bio[btype] = NULL; + } +} + +void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) +{ + down_write(&sbi->bio_sem); + do_submit_bio(sbi, type, sync); + up_write(&sbi->bio_sem); +} + +static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, enum page_type type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + + verify_block_addr(sbi, blk_addr); + + down_write(&sbi->bio_sem); + + inc_page_count(sbi, F2FS_WRITEBACK); + + if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) + do_submit_bio(sbi, type, false); +alloc_new: + if (sbi->bio[type] == NULL) { + struct bio_private *priv; +retry: + priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); + if (!priv) { + cond_resched(); + goto retry; + } + + sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); + sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + sbi->bio[type]->bi_private = priv; + /* + * The end_io will be assigned at the sumbission phase. + * Until then, let bio_add_page() merge consecutive IOs as much + * as possible. + */ + } + + if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + do_submit_bio(sbi, type, false); + goto alloc_new; + } + + sbi->last_block_in_bio[type] = blk_addr; + + up_write(&sbi->bio_sem); + trace_f2fs_submit_write_page(page, blk_addr, type); +} + +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + if (PageWriteback(page)) { + f2fs_submit_bio(sbi, type, sync); + wait_on_page_writeback(page); + } +} + +static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + if (curseg->next_blkoff < sbi->blocks_per_seg) + return true; + return false; +} + +static int __get_segment_type_2(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) + return CURSEG_HOT_DATA; + else + return CURSEG_HOT_NODE; +} + +static int __get_segment_type_4(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else + return CURSEG_COLD_DATA; + } else { + if (IS_DNODE(page) && !is_cold_node(page)) + return CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type_6(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else if (is_cold_data(page) || file_is_cold(inode)) + return CURSEG_COLD_DATA; + else + return CURSEG_WARM_DATA; + } else { + if (IS_DNODE(page)) + return is_cold_node(page) ? CURSEG_WARM_NODE : + CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type(struct page *page, enum page_type p_type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + switch (sbi->active_logs) { + case 2: + return __get_segment_type_2(page, p_type); + case 4: + return __get_segment_type_4(page, p_type); + } + /* NR_CURSEG_TYPE(6) logs by default */ + BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); + return __get_segment_type_6(page, p_type); +} + +static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, enum page_type p_type) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int old_cursegno; + int type; + + type = __get_segment_type(page, p_type); + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + + *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + old_cursegno = curseg->segno; + + /* + * __add_sum_entry should be resided under the curseg_mutex + * because, this function updates a summary entry in the + * current summary block. + */ + __add_sum_entry(sbi, type, sum); + + mutex_lock(&sit_i->sentry_lock); + __refresh_next_blkoff(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS + sbi->block_count[curseg->alloc_type]++; +#endif + + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + mutex_unlock(&sit_i->sentry_lock); + + if (p_type == NODE) + fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + + /* writeout dirty page into bdev */ + submit_write_page(sbi, page, *new_blkaddr, p_type); + + mutex_unlock(&curseg->curseg_mutex); +} + +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) +{ + set_page_writeback(page); + submit_write_page(sbi, page, page->index, META); +} + +void write_node_page(struct f2fs_sb_info *sbi, struct page *page, + unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) +{ + struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); +} + +void write_data_page(struct inode *inode, struct page *page, + struct dnode_of_data *dn, block_t old_blkaddr, + block_t *new_blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_summary sum; + struct node_info ni; + + BUG_ON(old_blkaddr == NULL_ADDR); + get_node_info(sbi, dn->nid, &ni); + set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + + do_write_page(sbi, page, old_blkaddr, + new_blkaddr, &sum, DATA); +} + +void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blk_addr) +{ + submit_write_page(sbi, page, old_blk_addr, DATA); +} + +void recover_data_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + struct seg_entry *se; + int type; + + segno = GET_SEGNO(sbi, new_blkaddr); + se = get_seg_entry(sbi, segno); + type = se->type; + + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum); + + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +void rewrite_node_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + int type = CURSEG_WARM_NODE; + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + block_t next_blkaddr = next_blkaddr_of_node(page); + unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); + + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + segno = GET_SEGNO(sbi, new_blkaddr); + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum); + + /* change the current log to the next block addr in advance */ + if (next_segno != segno) { + curseg->next_segno = next_segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & + (sbi->blocks_per_seg - 1); + + /* rewrite node page */ + set_page_writeback(page); + submit_write_page(sbi, page, new_blkaddr, NODE); + f2fs_submit_bio(sbi, NODE, true); + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +static int read_compacted_summaries(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct curseg_info *seg_i; + unsigned char *kaddr; + struct page *page; + block_t start; + int i, j, offset; + + start = start_sum_block(sbi); + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: restore nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); + + /* Step 2: restore sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, + SUM_JOURNAL_SIZE); + offset = 2 * SUM_JOURNAL_SIZE; + + /* Step 3: restore summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blk_off; + unsigned int segno; + + seg_i = CURSEG_I(sbi, i); + segno = le32_to_cpu(ckpt->cur_data_segno[i]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + seg_i->next_segno = segno; + reset_curseg(sbi, i, 0); + seg_i->alloc_type = ckpt->alloc_type[i]; + seg_i->next_blkoff = blk_off; + + if (seg_i->alloc_type == SSR) + blk_off = sbi->blocks_per_seg; + + for (j = 0; j < blk_off; j++) { + struct f2fs_summary *s; + s = (struct f2fs_summary *)(kaddr + offset); + seg_i->sum_blk->entries[j] = *s; + offset += SUMMARY_SIZE; + if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + offset = 0; + } + } + f2fs_put_page(page, 1); + return 0; +} + +static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_summary_block *sum; + struct curseg_info *curseg; + struct page *new; + unsigned short blk_off; + unsigned int segno = 0; + block_t blk_addr = 0; + + /* get segment number and block addr */ + if (IS_DATASEG(type)) { + segno = le32_to_cpu(ckpt->cur_data_segno[type]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - + CURSEG_HOT_DATA]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); + else + blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); + } else { + segno = le32_to_cpu(ckpt->cur_node_segno[type - + CURSEG_HOT_NODE]); + blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - + CURSEG_HOT_NODE]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, + type - CURSEG_HOT_NODE); + else + blk_addr = GET_SUM_BLOCK(sbi, segno); + } + + new = get_meta_page(sbi, blk_addr); + sum = (struct f2fs_summary_block *)page_address(new); + + if (IS_NODESEG(type)) { + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + struct f2fs_summary *ns = &sum->entries[0]; + int i; + for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { + ns->version = 0; + ns->ofs_in_node = 0; + } + } else { + if (restore_node_summary(sbi, segno, sum)) { + f2fs_put_page(new, 1); + return -EINVAL; + } + } + } + + /* set uncompleted segment to curseg */ + curseg = CURSEG_I(sbi, type); + mutex_lock(&curseg->curseg_mutex); + memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); + curseg->next_segno = segno; + reset_curseg(sbi, type, 0); + curseg->alloc_type = ckpt->alloc_type[type]; + curseg->next_blkoff = blk_off; + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(new, 1); + return 0; +} + +static int restore_curseg_summaries(struct f2fs_sb_info *sbi) +{ + int type = CURSEG_HOT_DATA; + + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + /* restore for compacted data summary */ + if (read_compacted_summaries(sbi)) + return -EINVAL; + type = CURSEG_HOT_NODE; + } + + for (; type <= CURSEG_COLD_NODE; type++) + if (read_normal_summaries(sbi, type)) + return -EINVAL; + return 0; +} + +static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct page *page; + unsigned char *kaddr; + struct f2fs_summary *summary; + struct curseg_info *seg_i; + int written_size = 0; + int i, j; + + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: write nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + /* Step 2: write sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, + SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + set_page_dirty(page); + + /* Step 3: write summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blkoff; + seg_i = CURSEG_I(sbi, i); + if (sbi->ckpt->alloc_type[i] == SSR) + blkoff = sbi->blocks_per_seg; + else + blkoff = curseg_blkoff(sbi, i); + + for (j = 0; j < blkoff; j++) { + if (!page) { + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + written_size = 0; + } + summary = (struct f2fs_summary *)(kaddr + written_size); + *summary = seg_i->sum_blk->entries[j]; + written_size += SUMMARY_SIZE; + set_page_dirty(page); + + if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + } + } + if (page) + f2fs_put_page(page, 1); +} + +static void write_normal_summaries(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + int i, end; + if (IS_DATASEG(type)) + end = type + NR_CURSEG_DATA_TYPE; + else + end = type + NR_CURSEG_NODE_TYPE; + + for (i = type; i < end; i++) { + struct curseg_info *sum = CURSEG_I(sbi, i); + mutex_lock(&sum->curseg_mutex); + write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); + mutex_unlock(&sum->curseg_mutex); + } +} + +void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) + write_compacted_summaries(sbi, start_blk); + else + write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); +} + +void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); +} + +int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, + unsigned int val, int alloc) +{ + int i; + + if (type == NAT_JOURNAL) { + for (i = 0; i < nats_in_cursum(sum); i++) { + if (le32_to_cpu(nid_in_journal(sum, i)) == val) + return i; + } + if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) + return update_nats_in_cursum(sum, 1); + } else if (type == SIT_JOURNAL) { + for (i = 0; i < sits_in_cursum(sum); i++) + if (le32_to_cpu(segno_in_journal(sum, i)) == val) + return i; + if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) + return update_sits_in_cursum(sum, 1); + } + return -1; +} + +static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, segno); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return get_meta_page(sbi, blk_addr); +} + +static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct page *src_page, *dst_page; + pgoff_t src_off, dst_off; + void *src_addr, *dst_addr; + + src_off = current_sit_addr(sbi, start); + dst_off = next_sit_addr(sbi, src_off); + + /* get current sit block page without lock */ + src_page = get_meta_page(sbi, src_off); + dst_page = grab_meta_page(sbi, dst_off); + BUG_ON(PageDirty(src_page)); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_sit(sit_i, start); + + return dst_page; +} + +static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + /* + * If the journal area in the current summary is full of sit entries, + * all the sit entries will be flushed. Otherwise the sit entries + * are not able to replace with newly hot sit entries. + */ + if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { + for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { + unsigned int segno; + segno = le32_to_cpu(segno_in_journal(sum, i)); + __mark_sit_entry_dirty(sbi, segno); + } + update_sits_in_cursum(sum, -sits_in_cursum(sum)); + return 1; + } + return 0; +} + +/* + * CP calls this function, which flushes SIT entries including sit_journal, + * and moves prefree segs to free segs. + */ +void flush_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned long *bitmap = sit_i->dirty_sentries_bitmap; + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned long nsegs = TOTAL_SEGS(sbi); + struct page *page = NULL; + struct f2fs_sit_block *raw_sit = NULL; + unsigned int start = 0, end = 0; + unsigned int segno = -1; + bool flushed; + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + /* + * "flushed" indicates whether sit entries in journal are flushed + * to the SIT area or not. + */ + flushed = flush_sits_in_journal(sbi); + + while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { + struct seg_entry *se = get_seg_entry(sbi, segno); + int sit_offset, offset; + + sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + + if (flushed) + goto to_sit_page; + + offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); + if (offset >= 0) { + segno_in_journal(sum, offset) = cpu_to_le32(segno); + seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); + goto flush_done; + } +to_sit_page: + if (!page || (start > segno) || (segno > end)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + + start = START_SEGNO(sit_i, segno); + end = start + SIT_ENTRY_PER_BLOCK - 1; + + /* read sit block that will be updated */ + page = get_next_sit_page(sbi, start); + raw_sit = page_address(page); + } + + /* udpate entry in SIT block */ + seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); +flush_done: + __clear_bit(segno, bitmap); + sit_i->dirty_sentries--; + } + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); + + /* writeout last modified SIT block */ + f2fs_put_page(page, 1); + + set_prefree_as_free_segments(sbi); +} + +static int build_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct sit_info *sit_i; + unsigned int sit_segs, start; + char *src_bitmap, *dst_bitmap; + unsigned int bitmap_size; + + /* allocate memory for SIT information */ + sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); + if (!sit_i) + return -ENOMEM; + + SM_I(sbi)->sit_info = sit_i; + + sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); + if (!sit_i->sentries) + return -ENOMEM; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!sit_i->dirty_sentries_bitmap) + return -ENOMEM; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + sit_i->sentries[start].cur_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + sit_i->sentries[start].ckpt_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map + || !sit_i->sentries[start].ckpt_valid_map) + return -ENOMEM; + } + + if (sbi->segs_per_sec > 1) { + sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * + sizeof(struct sec_entry)); + if (!sit_i->sec_entries) + return -ENOMEM; + } + + /* get information related with SIT */ + sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; + + /* setup SIT bitmap from ckeckpoint pack */ + bitmap_size = __bitmap_size(sbi, SIT_BITMAP); + src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); + + dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); + if (!dst_bitmap) + return -ENOMEM; + + /* init SIT information */ + sit_i->s_ops = &default_salloc_ops; + + sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); + sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; + sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); + sit_i->sit_bitmap = dst_bitmap; + sit_i->bitmap_size = bitmap_size; + sit_i->dirty_sentries = 0; + sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; + sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); + sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + mutex_init(&sit_i->sentry_lock); + return 0; +} + +static int build_free_segmap(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + struct free_segmap_info *free_i; + unsigned int bitmap_size, sec_bitmap_size; + + /* allocate memory for free segmap information */ + free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); + if (!free_i) + return -ENOMEM; + + SM_I(sbi)->free_info = free_i; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); + if (!free_i->free_segmap) + return -ENOMEM; + + sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); + if (!free_i->free_secmap) + return -ENOMEM; + + /* set all segments as dirty temporarily */ + memset(free_i->free_segmap, 0xff, bitmap_size); + memset(free_i->free_secmap, 0xff, sec_bitmap_size); + + /* init free segmap information */ + free_i->start_segno = + (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); + free_i->free_segments = 0; + free_i->free_sections = 0; + rwlock_init(&free_i->segmap_lock); + return 0; +} + +static int build_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array; + int i; + + array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + if (!array) + return -ENOMEM; + + SM_I(sbi)->curseg_array = array; + + for (i = 0; i < NR_CURSEG_TYPE; i++) { + mutex_init(&array[i].curseg_mutex); + array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + if (!array[i].sum_blk) + return -ENOMEM; + array[i].segno = NULL_SEGNO; + array[i].next_blkoff = 0; + } + return restore_curseg_summaries(sbi); +} + +static void build_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned int start; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *se = &sit_i->sentries[start]; + struct f2fs_sit_block *sit_blk; + struct f2fs_sit_entry sit; + struct page *page; + int i; + + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < sits_in_cursum(sum); i++) { + if (le32_to_cpu(segno_in_journal(sum, i)) == start) { + sit = sit_in_journal(sum, i); + mutex_unlock(&curseg->curseg_mutex); + goto got_it; + } + } + mutex_unlock(&curseg->curseg_mutex); + page = get_current_sit_page(sbi, start); + sit_blk = (struct f2fs_sit_block *)page_address(page); + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; + f2fs_put_page(page, 1); +got_it: + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + if (sbi->segs_per_sec > 1) { + struct sec_entry *e = get_sec_entry(sbi, start); + e->valid_blocks += se->valid_blocks; + } + } +} + +static void init_free_segmap(struct f2fs_sb_info *sbi) +{ + unsigned int start; + int type; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *sentry = get_seg_entry(sbi, start); + if (!sentry->valid_blocks) + __set_free(sbi, start); + } + + /* set use the current segments */ + for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { + struct curseg_info *curseg_t = CURSEG_I(sbi, type); + __set_test_and_inuse(sbi, curseg_t->segno); + } +} + +static void init_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); + unsigned short valid_blocks; + + while (1) { + /* find dirty segment based on free segmap */ + segno = find_next_inuse(free_i, total_segs, offset); + if (segno >= total_segs) + break; + offset = segno + 1; + valid_blocks = get_valid_blocks(sbi, segno, 0); + if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) + continue; + mutex_lock(&dirty_i->seglist_lock); + __locate_dirty_segment(sbi, segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + } +} + +static int init_victim_secmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + + dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->victim_secmap) + return -ENOMEM; + return 0; +} + +static int build_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i; + unsigned int bitmap_size, i; + + /* allocate memory for dirty segments list information */ + dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); + if (!dirty_i) + return -ENOMEM; + + SM_I(sbi)->dirty_info = dirty_i; + mutex_init(&dirty_i->seglist_lock); + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + + for (i = 0; i < NR_DIRTY_TYPE; i++) { + dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->dirty_segmap[i]) + return -ENOMEM; + } + + init_dirty_segmap(sbi); + return init_victim_secmap(sbi); +} + +/* + * Update min, max modified time for cost-benefit GC algorithm + */ +static void init_min_max_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno; + + mutex_lock(&sit_i->sentry_lock); + + sit_i->min_mtime = LLONG_MAX; + + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + unsigned int i; + unsigned long long mtime = 0; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, segno + i)->mtime; + + mtime = div_u64(mtime, sbi->segs_per_sec); + + if (sit_i->min_mtime > mtime) + sit_i->min_mtime = mtime; + } + sit_i->max_mtime = get_mtime(sbi); + mutex_unlock(&sit_i->sentry_lock); +} + +int build_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_sm_info *sm_info; + int err; + + sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); + if (!sm_info) + return -ENOMEM; + + /* init sm info */ + sbi->sm_info = sm_info; + INIT_LIST_HEAD(&sm_info->wblist_head); + spin_lock_init(&sm_info->wblist_lock); + sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); + sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); + sm_info->segment_count = le32_to_cpu(raw_super->segment_count); + sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); + sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); + sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); + sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + + err = build_sit_info(sbi); + if (err) + return err; + err = build_free_segmap(sbi); + if (err) + return err; + err = build_curseg(sbi); + if (err) + return err; + + /* reinit free segmap based on SIT */ + build_sit_entries(sbi); + + init_free_segmap(sbi); + err = build_dirty_segmap(sbi); + if (err) + return err; + + init_min_max_mtime(sbi); + return 0; +} + +static void discard_dirty_segmap(struct f2fs_sb_info *sbi, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + mutex_lock(&dirty_i->seglist_lock); + kfree(dirty_i->dirty_segmap[dirty_type]); + dirty_i->nr_dirty[dirty_type] = 0; + mutex_unlock(&dirty_i->seglist_lock); +} + +static void destroy_victim_secmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + kfree(dirty_i->victim_secmap); +} + +static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + int i; + + if (!dirty_i) + return; + + /* discard pre-free/dirty segments list */ + for (i = 0; i < NR_DIRTY_TYPE; i++) + discard_dirty_segmap(sbi, i); + + destroy_victim_secmap(sbi); + SM_I(sbi)->dirty_info = NULL; + kfree(dirty_i); +} + +static void destroy_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array = SM_I(sbi)->curseg_array; + int i; + + if (!array) + return; + SM_I(sbi)->curseg_array = NULL; + for (i = 0; i < NR_CURSEG_TYPE; i++) + kfree(array[i].sum_blk); + kfree(array); +} + +static void destroy_free_segmap(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = SM_I(sbi)->free_info; + if (!free_i) + return; + SM_I(sbi)->free_info = NULL; + kfree(free_i->free_segmap); + kfree(free_i->free_secmap); + kfree(free_i); +} + +static void destroy_sit_info(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int start; + + if (!sit_i) + return; + + if (sit_i->sentries) { + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + kfree(sit_i->sentries[start].cur_valid_map); + kfree(sit_i->sentries[start].ckpt_valid_map); + } + } + vfree(sit_i->sentries); + vfree(sit_i->sec_entries); + kfree(sit_i->dirty_sentries_bitmap); + + SM_I(sbi)->sit_info = NULL; + kfree(sit_i->sit_bitmap); + kfree(sit_i); +} + +void destroy_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + destroy_dirty_segmap(sbi); + destroy_curseg(sbi); + destroy_free_segmap(sbi); + destroy_sit_info(sbi); + sbi->sm_info = NULL; + kfree(sm_info); +} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h new file mode 100644 index 00000000000..69a5c79f67b --- /dev/null +++ b/fs/f2fs/segment.h @@ -0,0 +1,639 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/segment.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +/* constant macro */ +#define NULL_SEGNO ((unsigned int)(~0)) +#define NULL_SECNO ((unsigned int)(~0)) + +/* L: Logical segment # in volume, R: Relative segment # in main area */ +#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) +#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) + +#define IS_DATASEG(t) \ + ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ + (t == CURSEG_WARM_DATA)) + +#define IS_NODESEG(t) \ + ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ + (t == CURSEG_WARM_NODE)) + +#define IS_CURSEG(sbi, seg) \ + ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + +#define IS_CURSEC(sbi, secno) \ + ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + sbi->segs_per_sec)) \ + +#define START_BLOCK(sbi, segno) \ + (SM_I(sbi)->seg0_blkaddr + \ + (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) +#define NEXT_FREE_BLKADDR(sbi, curseg) \ + (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + +#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) + +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ + ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_SEGNO(sbi, blk_addr) \ + (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) +#define GET_SECNO(sbi, segno) \ + ((segno) / sbi->segs_per_sec) +#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ + ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) + +#define GET_SUM_BLOCK(sbi, segno) \ + ((sbi->sm_info->ssa_blkaddr) + segno) + +#define GET_SUM_TYPE(footer) ((footer)->entry_type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) + +#define SIT_ENTRY_OFFSET(sit_i, segno) \ + (segno % sit_i->sents_per_block) +#define SIT_BLOCK_OFFSET(sit_i, segno) \ + (segno / SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(sit_i, segno) \ + (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define f2fs_bitmap_size(nr) \ + (BITS_TO_LONGS(nr) * sizeof(unsigned long)) +#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) +#define TOTAL_SECS(sbi) (sbi->total_sections) + +#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ + (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) +#define SECTOR_TO_BLOCK(sbi, sectors) \ + (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) + +/* during checkpoint, bio_private is used to synchronize the last bio */ +struct bio_private { + struct f2fs_sb_info *sbi; + bool is_sync; + void *wait; +}; + +/* + * indicate a block allocation direction: RIGHT and LEFT. + * RIGHT means allocating new sections towards the end of volume. + * LEFT means the opposite direction. + */ +enum { + ALLOC_RIGHT = 0, + ALLOC_LEFT +}; + +/* + * In the victim_sel_policy->alloc_mode, there are two block allocation modes. + * LFS writes data sequentially with cleaning operations. + * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. + */ +enum { + LFS = 0, + SSR +}; + +/* + * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. + * GC_CB is based on cost-benefit algorithm. + * GC_GREEDY is based on greedy algorithm. + */ +enum { + GC_CB = 0, + GC_GREEDY +}; + +/* + * BG_GC means the background cleaning job. + * FG_GC means the on-demand cleaning job. + */ +enum { + BG_GC = 0, + FG_GC +}; + +/* for a function parameter to select a victim segment */ +struct victim_sel_policy { + int alloc_mode; /* LFS or SSR */ + int gc_mode; /* GC_CB or GC_GREEDY */ + unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int offset; /* last scanned bitmap offset */ + unsigned int ofs_unit; /* bitmap search unit */ + unsigned int min_cost; /* minimum cost */ + unsigned int min_segno; /* segment # having min. cost */ +}; + +struct seg_entry { + unsigned short valid_blocks; /* # of valid blocks */ + unsigned char *cur_valid_map; /* validity bitmap of blocks */ + /* + * # of valid blocks and the validity bitmap stored in the the last + * checkpoint pack. This information is used by the SSR mode. + */ + unsigned short ckpt_valid_blocks; + unsigned char *ckpt_valid_map; + unsigned char type; /* segment type like CURSEG_XXX_TYPE */ + unsigned long long mtime; /* modification time of the segment */ +}; + +struct sec_entry { + unsigned int valid_blocks; /* # of valid blocks in a section */ +}; + +struct segment_allocation { + void (*allocate_segment)(struct f2fs_sb_info *, int, bool); +}; + +struct sit_info { + const struct segment_allocation *s_ops; + + block_t sit_base_addr; /* start block address of SIT area */ + block_t sit_blocks; /* # of blocks used by SIT area */ + block_t written_valid_blocks; /* # of valid blocks in main area */ + char *sit_bitmap; /* SIT bitmap pointer */ + unsigned int bitmap_size; /* SIT bitmap size */ + + unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ + unsigned int dirty_sentries; /* # of dirty sentries */ + unsigned int sents_per_block; /* # of SIT entries per block */ + struct mutex sentry_lock; /* to protect SIT cache */ + struct seg_entry *sentries; /* SIT segment-level cache */ + struct sec_entry *sec_entries; /* SIT section-level cache */ + + /* for cost-benefit algorithm in cleaning procedure */ + unsigned long long elapsed_time; /* elapsed time after mount */ + unsigned long long mounted_time; /* mount time */ + unsigned long long min_mtime; /* min. modification time */ + unsigned long long max_mtime; /* max. modification time */ +}; + +struct free_segmap_info { + unsigned int start_segno; /* start segment number logically */ + unsigned int free_segments; /* # of free segments */ + unsigned int free_sections; /* # of free sections */ + rwlock_t segmap_lock; /* free segmap lock */ + unsigned long *free_segmap; /* free segment bitmap */ + unsigned long *free_secmap; /* free section bitmap */ +}; + +/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ +enum dirty_type { + DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ + DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ + DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ + DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ + DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ + DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ + DIRTY, /* to count # of dirty segments */ + PRE, /* to count # of entirely obsolete segments */ + NR_DIRTY_TYPE +}; + +struct dirty_seglist_info { + const struct victim_selection *v_ops; /* victim selction operation */ + unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + struct mutex seglist_lock; /* lock for segment bitmaps */ + int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ + unsigned long *victim_secmap; /* background GC victims */ +}; + +/* victim selection function for cleaning and SSR */ +struct victim_selection { + int (*get_victim)(struct f2fs_sb_info *, unsigned int *, + int, int, char); +}; + +/* for active log information */ +struct curseg_info { + struct mutex curseg_mutex; /* lock for consistency */ + struct f2fs_summary_block *sum_blk; /* cached summary block */ + unsigned char alloc_type; /* current allocation type */ + unsigned int segno; /* current segment number */ + unsigned short next_blkoff; /* next block offset to write */ + unsigned int zone; /* current zone number */ + unsigned int next_segno; /* preallocated segment */ +}; + +/* + * inline functions + */ +static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) +{ + return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); +} + +static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sentries[segno]; +} + +static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; +} + +static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno, int section) +{ + /* + * In order to get # of valid blocks in a section instantly from many + * segments, f2fs manages two counting structures separately. + */ + if (section > 1) + return get_sec_entry(sbi, segno)->valid_blocks; + else + return get_seg_entry(sbi, segno)->valid_blocks; +} + +static inline void seg_info_from_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + se->valid_blocks = GET_SIT_VBLOCKS(rs); + se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); + memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->type = GET_SIT_TYPE(rs); + se->mtime = le64_to_cpu(rs->mtime); +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | + se->valid_blocks; + rs->vblocks = cpu_to_le16(raw_vblocks); + memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->ckpt_valid_blocks = se->valid_blocks; + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, + unsigned int max, unsigned int segno) +{ + unsigned int ret; + read_lock(&free_i->segmap_lock); + ret = find_next_bit(free_i->free_segmap, max, segno); + read_unlock(&free_i->segmap_lock); + return ret; +} + +static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + clear_bit(segno, free_i->free_segmap); + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + clear_bit(secno, free_i->free_secmap); + free_i->free_sections++; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + set_bit(segno, free_i->free_segmap); + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; +} + +static inline void __set_test_and_free(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + if (test_and_clear_bit(segno, free_i->free_segmap)) { + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), + start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + if (test_and_clear_bit(secno, free_i->free_secmap)) + free_i->free_sections++; + } + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + write_lock(&free_i->segmap_lock); + if (!test_and_set_bit(segno, free_i->free_segmap)) { + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, + void *dst_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); +} + +static inline block_t written_block_count(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_t vblocks; + + mutex_lock(&sit_i->sentry_lock); + vblocks = sit_i->written_valid_blocks; + mutex_unlock(&sit_i->sentry_lock); + + return vblocks; +} + +static inline unsigned int free_segments(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_segs; + + read_lock(&free_i->segmap_lock); + free_segs = free_i->free_segments; + read_unlock(&free_i->segmap_lock); + + return free_segs; +} + +static inline int reserved_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->reserved_segments; +} + +static inline unsigned int free_sections(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_secs; + + read_lock(&free_i->segmap_lock); + free_secs = free_i->free_sections; + read_unlock(&free_i->segmap_lock); + + return free_secs; +} + +static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[PRE]; +} + +static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; +} + +static inline int overprovision_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->ovp_segments; +} + +static inline int overprovision_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; +} + +static inline int reserved_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; +} + +static inline bool need_SSR(struct f2fs_sb_info *sbi) +{ + return (free_sections(sbi) < overprovision_sections(sbi)); +} + +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + + if (sbi->por_doing) + return false; + + return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi))); +} + +static inline int utilization(struct f2fs_sb_info *sbi) +{ + return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); +} + +/* + * Sometimes f2fs may be better to drop out-of-place update policy. + * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write + * data in the original place likewise other traditional file systems. + * But, currently set 100 in percentage, which means it is disabled. + * See below need_inplace_update(). + */ +#define MIN_IPU_UTIL 100 +static inline bool need_inplace_update(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode)) + return false; + if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + return true; + return false; +} + +static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->segno; +} + +static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->alloc_type; +} + +static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->next_blkoff; +} + +static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) +{ + unsigned int end_segno = SM_I(sbi)->segment_count - 1; + BUG_ON(segno > end_segno); +} + +/* + * This function is used for only debugging. + * NOTE: In future, we have to remove this function. + */ +static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; + block_t start_addr = sm_info->seg0_blkaddr; + block_t end_addr = start_addr + total_blks - 1; + BUG_ON(blk_addr < start_addr); + BUG_ON(blk_addr > end_addr); +} + +/* + * Summary block is always treated as invalid block + */ +static inline void check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + unsigned int end_segno = sm_info->segment_count - 1; + int valid_blocks = 0; + int i; + + /* check segment usage */ + BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); + + /* check boundary of a given segment number */ + BUG_ON(segno > end_segno); + + /* check bitmap with valid block count */ + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, raw_sit->valid_map)) + valid_blocks++; + BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); +} + +static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, start); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return blk_addr; +} + +static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_addr -= sit_i->sit_base_addr; + if (block_addr < sit_i->sit_blocks) + block_addr += sit_i->sit_blocks; + else + block_addr -= sit_i->sit_blocks; + + return block_addr + sit_i->sit_base_addr; +} + +static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) +{ + unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + + if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) + f2fs_clear_bit(block_off, sit_i->sit_bitmap); + else + f2fs_set_bit(block_off, sit_i->sit_bitmap); +} + +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - + sit_i->mounted_time; +} + +static inline void set_summary(struct f2fs_summary *sum, nid_t nid, + unsigned int ofs_in_node, unsigned char version) +{ + sum->nid = cpu_to_le32(nid); + sum->ofs_in_node = cpu_to_le16(ofs_in_node); + sum->version = version; +} + +static inline block_t start_sum_block(struct f2fs_sb_info *sbi) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) + - (base + 1) + type; +} + +static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) +{ + if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + return true; + return false; +} + +static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c new file mode 100644 index 00000000000..7153f8445cf --- /dev/null +++ b/fs/f2fs/super.c @@ -0,0 +1,1156 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/super.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "xattr.h" +#include "gc.h" + +#define CREATE_TRACE_POINTS +#include + +static struct proc_dir_entry *f2fs_proc_root; +static struct kmem_cache *f2fs_inode_cachep; +static struct kset *f2fs_kset; + +enum { + Opt_gc_background, + Opt_disable_roll_forward, + Opt_discard, + Opt_noheap, + Opt_nouser_xattr, + Opt_noacl, + Opt_active_logs, + Opt_disable_ext_identify, + Opt_inline_xattr, + Opt_android_emu, + Opt_err_continue, + Opt_err_panic, + Opt_err_recover, + Opt_err, +}; + +static match_table_t f2fs_tokens = { + {Opt_gc_background, "background_gc=%s"}, + {Opt_disable_roll_forward, "disable_roll_forward"}, + {Opt_discard, "discard"}, + {Opt_noheap, "no_heap"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_noacl, "noacl"}, + {Opt_active_logs, "active_logs=%u"}, + {Opt_disable_ext_identify, "disable_ext_identify"}, + {Opt_inline_xattr, "inline_xattr"}, + {Opt_android_emu, "android_emu=%s"}, + {Opt_err_continue, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_recover, "errors=recover"}, + {Opt_err, NULL}, +}; + +/* Sysfs support for f2fs */ +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int offset; +}; + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned int *ui; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .offset = offsetof(struct f2fs_gc_kthread, _elname), \ +} + +#define F2FS_RW_ATTR(name, elname) \ + F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) + +F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(gc_idle, gc_idle); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + va_end(args); +} + +static void init_once(void *foo) +{ + struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; + + inode_init_once(&fi->vfs_inode); +} + +static int parse_android_emu(struct f2fs_sb_info *sbi, char *args) +{ + char *sep = args; + char *sepres; + int ret; + + if (!sep) + return -EINVAL; + + sepres = strsep(&sep, ":"); + if (!sep) + return -EINVAL; + ret = kstrtou32(sepres, 0, &sbi->android_emu_uid); + if (ret) + return ret; + + sepres = strsep(&sep, ":"); + if (!sep) + return -EINVAL; + ret = kstrtou32(sepres, 0, &sbi->android_emu_gid); + if (ret) + return ret; + + sepres = strsep(&sep, ":"); + ret = kstrtou16(sepres, 8, &sbi->android_emu_mode); + if (ret) + return ret; + + if (sep && strstr(sep, "nocase")) + sbi->android_emu_flags = F2FS_ANDROID_EMU_NOCASE; + + return 0; +} + +static int parse_options(struct super_block *sb, char *options) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p, *name; + int arg = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = NULL; + token = match_token(p, f2fs_tokens, args); + + switch (token) { + case Opt_gc_background: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strncmp(name, "on", 2)) + set_opt(sbi, BG_GC); + else if (!strncmp(name, "off", 3)) + clear_opt(sbi, BG_GC); + else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_discard: + set_opt(sbi, DISCARD); + break; + case Opt_noheap: + set_opt(sbi, NOHEAP); + break; +#ifdef CONFIG_F2FS_FS_XATTR + case Opt_nouser_xattr: + clear_opt(sbi, XATTR_USER); + break; + case Opt_inline_xattr: + set_opt(sbi, INLINE_XATTR); + break; +#else + case Opt_nouser_xattr: + f2fs_msg(sb, KERN_INFO, + "nouser_xattr options not supported"); + break; + case Opt_inline_xattr: + f2fs_msg(sb, KERN_INFO, + "inline_xattr options not supported"); + break; +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_noacl: + clear_opt(sbi, POSIX_ACL); + break; +#else + case Opt_noacl: + f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + break; +#endif + case Opt_active_logs: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) + return -EINVAL; + sbi->active_logs = arg; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + case Opt_err_continue: + clear_opt(sbi, ERRORS_RECOVER); + clear_opt(sbi, ERRORS_PANIC); + break; + case Opt_err_panic: + set_opt(sbi, ERRORS_PANIC); + clear_opt(sbi, ERRORS_RECOVER); + break; + case Opt_err_recover: + set_opt(sbi, ERRORS_RECOVER); + clear_opt(sbi, ERRORS_PANIC); + break; + case Opt_android_emu: + if (args->from) { + int ret; + char *perms = match_strdup(args); + + ret = parse_android_emu(sbi, perms); + kfree(perms); + + if (ret) + return -EINVAL; + + set_opt(sbi, ANDROID_EMU); + } else + return -EINVAL; + break; + + default: + f2fs_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" or missing value", + p); + return -EINVAL; + } + } + return 0; +} + +static struct inode *f2fs_alloc_inode(struct super_block *sb) +{ + struct f2fs_inode_info *fi; + + fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); + if (!fi) + return NULL; + + init_once((void *) fi); + + /* Initialize f2fs-specific inode info */ + fi->vfs_inode.i_version = 1; + atomic_set(&fi->dirty_dents, 0); + fi->i_current_depth = 1; + fi->i_advise = 0; + rwlock_init(&fi->ext.ext_lock); + + set_inode_flag(fi, FI_NEW_INODE); + + if (test_opt(F2FS_SB(sb), INLINE_XATTR)) + set_inode_flag(fi, FI_INLINE_XATTR); + + return &fi->vfs_inode; +} + +static int f2fs_drop_inode(struct inode *inode) +{ + /* + * This is to avoid a deadlock condition like below. + * writeback_single_inode(inode) + * - f2fs_write_data_page + * - f2fs_gc -> iput -> evict + * - inode_wait_for_writeback(inode) + */ + if (!inode_unhashed(inode) && inode->i_state & I_SYNC) + return 0; + return generic_drop_inode(inode); +} + +/* + * f2fs_dirty_inode() is called from __mark_inode_dirty() + * + * We should call set_dirty_inode to write the dirty inode through write_inode. + */ +static void f2fs_dirty_inode(struct inode *inode, int flags) +{ + set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); +} + +static void f2fs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); +} + +static void f2fs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, f2fs_i_callback); +} + +static void f2fs_put_super(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + kobject_del(&sbi->s_kobj); + + f2fs_destroy_stats(sbi); + stop_gc_thread(sbi); + + write_checkpoint(sbi, true); + + iput(sbi->node_inode); + iput(sbi->meta_inode); + + /* destroy f2fs internal modules */ + destroy_node_manager(sbi); + destroy_segment_manager(sbi); + + kfree(sbi->ckpt); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + + sb->s_fs_info = NULL; + brelse(sbi->raw_super_buf); + kfree(sbi); +} + +int f2fs_sync_fs(struct super_block *sb, int sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + trace_f2fs_sync_fs(sb, sync); + + if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) + return 0; + + if (sync) { + mutex_lock(&sbi->gc_mutex); + write_checkpoint(sbi, false); + mutex_unlock(&sbi->gc_mutex); + } else { + f2fs_balance_fs(sbi); + } + + return 0; +} + +static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + block_t total_count, user_block_count, start_count, ovp_count; + + total_count = le64_to_cpu(sbi->raw_super->block_count); + user_block_count = sbi->user_block_count; + start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); + ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + buf->f_type = F2FS_SUPER_MAGIC; + buf->f_bsize = sbi->blocksize; + + buf->f_blocks = total_count - start_count; + buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; + buf->f_bavail = user_block_count - valid_user_blocks(sbi); + + buf->f_files = sbi->total_node_count; + buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); + + buf->f_namelen = F2FS_NAME_LEN; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + + return 0; +} + +static int f2fs_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct f2fs_sb_info *sbi = F2FS_SB(vfs->mnt_sb); + + if (!(vfs->mnt_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + seq_printf(seq, ",background_gc=%s", "on"); + else + seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) + seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, DISCARD)) + seq_puts(seq, ",discard"); + if (test_opt(sbi, NOHEAP)) + seq_puts(seq, ",no_heap_alloc"); +#ifdef CONFIG_F2FS_FS_XATTR + if (test_opt(sbi, XATTR_USER)) + seq_puts(seq, ",user_xattr"); + else + seq_puts(seq, ",nouser_xattr"); + if (test_opt(sbi, INLINE_XATTR)) + seq_puts(seq, ",inline_xattr"); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + if (test_opt(sbi, POSIX_ACL)) + seq_puts(seq, ",acl"); + else + seq_puts(seq, ",noacl"); +#endif + if (test_opt(sbi, ERRORS_PANIC)) + seq_puts(seq, ",errors=panic"); + else if (test_opt(sbi, ERRORS_RECOVER)) + seq_puts(seq, ",errors=recover"); + else + seq_puts(seq, ",errors=continue"); + if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) + seq_puts(seq, ",disable_ext_identify"); + + if (test_opt(sbi, ANDROID_EMU)) + seq_printf(seq, ",android_emu=%u:%u:%ho%s", + sbi->android_emu_uid, + sbi->android_emu_gid, + sbi->android_emu_mode, + (sbi->android_emu_flags & + F2FS_ANDROID_EMU_NOCASE) ? + ":nocase" : ""); + + seq_printf(seq, ",active_logs=%u", sbi->active_logs); + + return 0; +} + +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + for (i = 0; i < total_segs; i++) { + seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); + if (i != 0 && (i % 10) == 0) + seq_puts(seq, "\n"); + else + seq_puts(seq, " "); + } + return 0; +} + +static int segment_info_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, segment_info_seq_show, + PROC_I(inode)->pde->data); +} + +static const struct file_operations f2fs_seq_segment_info_fops = { + .owner = THIS_MODULE, + .open = segment_info_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int f2fs_remount(struct super_block *sb, int *flags, char *data) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_mount_info org_mount_opt; + int err, active_logs; + + /* + * Save the old mount options in case we + * need to restore them. + */ + org_mount_opt = sbi->mount_opt; + active_logs = sbi->active_logs; + + /* parse mount options */ + err = parse_options(sb, data); + if (err) + goto restore_opts; + + /* + * Previous and new state of filesystem is RO, + * so no point in checking GC conditions. + */ + if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + goto skip; + + /* + * We stop the GC thread if FS is mounted as RO + * or if background_gc = off is passed in mount + * option. Also sync the filesystem. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if (sbi->gc_thread) { + stop_gc_thread(sbi); + f2fs_sync_fs(sb, 1); + } + } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + err = start_gc_thread(sbi); + if (err) + goto restore_opts; + } +skip: + /* Update the POSIXACL Flag */ + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + return 0; + +restore_opts: + sbi->mount_opt = org_mount_opt; + sbi->active_logs = active_logs; + return err; +} + +static struct super_operations f2fs_sops = { + .alloc_inode = f2fs_alloc_inode, + .drop_inode = f2fs_drop_inode, + .destroy_inode = f2fs_destroy_inode, + .write_inode = f2fs_write_inode, + .dirty_inode = f2fs_dirty_inode, + .show_options = f2fs_show_options, + .evict_inode = f2fs_evict_inode, + .put_super = f2fs_put_super, + .sync_fs = f2fs_sync_fs, + .statfs = f2fs_statfs, + .remount_fs = f2fs_remount, +}; + +static struct inode *f2fs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + + if (ino < F2FS_ROOT_INO(sbi)) + return ERR_PTR(-ESTALE); + + /* + * f2fs_iget isn't quite right if the inode is currently unallocated! + * However f2fs_iget currently does appropriate checks to handle stale + * inodes so everything is OK. + */ + inode = f2fs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { + /* we didn't find the right inode.. */ + iput(inode); + return ERR_PTR(-ESTALE); + } + return inode; +} + +static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static const struct export_operations f2fs_export_ops = { + .fh_to_dentry = f2fs_fh_to_dentry, + .fh_to_parent = f2fs_fh_to_parent, + .get_parent = f2fs_get_parent, +}; + +static loff_t max_file_size(unsigned bits) +{ + loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); + loff_t leaf_count = ADDRS_PER_BLOCK; + + /* two direct node blocks */ + result += (leaf_count * 2); + + /* two indirect node blocks */ + leaf_count *= NIDS_PER_BLOCK; + result += (leaf_count * 2); + + /* one double indirect node block */ + leaf_count *= NIDS_PER_BLOCK; + result += leaf_count; + + result <<= bits; + return result; +} + +static int sanity_check_raw_super(struct super_block *sb, + struct f2fs_super_block *raw_super) +{ + unsigned int blocksize; + + if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { + f2fs_msg(sb, KERN_INFO, + "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + return 1; + } + + /* Currently, support only 4KB page cache size */ + if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) { + f2fs_msg(sb, KERN_INFO, + "Invalid page_cache_size (%lu), supports only 4KB\n", + PAGE_CACHE_SIZE); + return 1; + } + + /* Currently, support only 4KB block size */ + blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); + if (blocksize != F2FS_BLKSIZE) { + f2fs_msg(sb, KERN_INFO, + "Invalid blocksize (%u), supports only 4KB\n", + blocksize); + return 1; + } + + if (le32_to_cpu(raw_super->log_sectorsize) != + F2FS_LOG_SECTOR_SIZE) { + f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); + return 1; + } + if (le32_to_cpu(raw_super->log_sectors_per_block) != + F2FS_LOG_SECTORS_PER_BLOCK) { + f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); + return 1; + } + return 0; +} + +static int sanity_check_ckpt(struct f2fs_sb_info *sbi) +{ + unsigned int total, fsmeta; + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + total = le32_to_cpu(raw_super->segment_count); + fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); + fsmeta += le32_to_cpu(raw_super->segment_count_sit); + fsmeta += le32_to_cpu(raw_super->segment_count_nat); + fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); + fsmeta += le32_to_cpu(raw_super->segment_count_ssa); + + if (fsmeta >= total) + return 1; + + if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + return 1; + } + return 0; +} + +static void init_sb_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = sbi->raw_super; + int i; + + sbi->log_sectors_per_block = + le32_to_cpu(raw_super->log_sectors_per_block); + sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize); + sbi->blocksize = 1 << sbi->log_blocksize; + sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg; + sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); + sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); + sbi->total_sections = le32_to_cpu(raw_super->section_count); + sbi->total_node_count = + (le32_to_cpu(raw_super->segment_count_nat) / 2) + * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; + sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); + sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); + sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + sbi->cur_victim_sec = NULL_SECNO; + + for (i = 0; i < NR_COUNT_TYPE; i++) + atomic_set(&sbi->nr_pages[i], 0); +} + +static int validate_superblock(struct super_block *sb, + struct f2fs_super_block **raw_super, + struct buffer_head **raw_super_buf, sector_t block) +{ + const char *super = (block == 0 ? "first" : "second"); + + /* read f2fs raw super block */ + *raw_super_buf = sb_bread(sb, block); + if (!*raw_super_buf) { + f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", + super); + return -EIO; + } + + *raw_super = (struct f2fs_super_block *) + ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); + + /* sanity checking of raw super */ + if (!sanity_check_raw_super(sb, *raw_super)) + return 0; + + f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " + "in %s superblock", super); + return -EINVAL; +} + +static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct f2fs_sb_info *sbi; + struct f2fs_super_block *raw_super; + struct buffer_head *raw_super_buf; + struct inode *root; + long err = -EINVAL; + int i; + const char *descr = ""; + + f2fs_msg(sb, KERN_INFO, "mounting.."); + /* allocate memory for f2fs-specific super block info */ + sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + /* set a block size */ + if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { + f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); + goto free_sbi; + } + + err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); + if (err) { + brelse(raw_super_buf); + /* check secondary superblock when primary failed */ + err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); + if (err) + goto free_sb_buf; + } + sb->s_fs_info = sbi; + /* init some FS parameters */ + sbi->active_logs = NR_CURSEG_TYPE; + + set_opt(sbi, BG_GC); + +#ifdef CONFIG_F2FS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif + /* parse mount options */ + err = parse_options(sb, (char *)data); + if (err) + goto free_sb_buf; + + sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + get_random_bytes(&sbi->s_next_generation, sizeof(u32)); + + sb->s_op = &f2fs_sops; + sb->s_xattr = f2fs_xattr_handlers; + sb->s_export_op = &f2fs_export_ops; + sb->s_magic = F2FS_SUPER_MAGIC; + sb->s_time_gran = 1; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + + /* init f2fs-specific super block info */ + sbi->sb = sb; + sbi->raw_super = raw_super; + sbi->raw_super_buf = raw_super_buf; + mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); + mutex_init(&sbi->cp_mutex); + for (i = 0; i < NR_GLOBAL_LOCKS; i++) + mutex_init(&sbi->fs_lock[i]); + mutex_init(&sbi->node_write); + sbi->por_doing = 0; + spin_lock_init(&sbi->stat_lock); + init_rwsem(&sbi->bio_sem); + init_sb_info(sbi); + + /* get an inode for meta space */ + sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); + if (IS_ERR(sbi->meta_inode)) { + f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); + err = PTR_ERR(sbi->meta_inode); + goto free_sb_buf; + } + +get_cp: + err = get_valid_checkpoint(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); + goto free_meta_inode; + } + + /* sanity checking of checkpoint */ + err = -EINVAL; + if (sanity_check_ckpt(sbi)) { + f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint"); + goto free_cp; + } + + sbi->total_valid_node_count = + le32_to_cpu(sbi->ckpt->valid_node_count); + sbi->total_valid_inode_count = + le32_to_cpu(sbi->ckpt->valid_inode_count); + sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); + sbi->total_valid_block_count = + le64_to_cpu(sbi->ckpt->valid_block_count); + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + INIT_LIST_HEAD(&sbi->dir_inode_list); + spin_lock_init(&sbi->dir_inode_lock); + + init_orphan_info(sbi); + + /* setup f2fs internal modules */ + err = build_segment_manager(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to initialize F2FS segment manager"); + goto free_sm; + } + err = build_node_manager(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to initialize F2FS node manager"); + goto free_nm; + } + + build_gc_manager(sbi); + + /* get an inode for node space */ + sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); + if (IS_ERR(sbi->node_inode)) { + f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); + err = PTR_ERR(sbi->node_inode); + goto free_nm; + } + + /* if there are nt orphan nodes free them */ + err = -EINVAL; + if (recover_orphan_inodes(sbi)) + goto free_node_inode; + + /* read root inode and dentry */ + root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); + if (IS_ERR(root)) { + f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); + err = PTR_ERR(root); + goto free_node_inode; + } + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) + goto free_root_inode; + + sb->s_root = d_alloc_root(root); /* allocate root dentry */ + if (!sb->s_root) { + err = -ENOMEM; + goto free_root_inode; + } + + /* recover fsynced data */ + if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + err = recover_fsync_data(sbi); + if (err) { + if (f2fs_handle_error(sbi)) { + set_opt(sbi, DISABLE_ROLL_FORWARD); + kfree(sbi->ckpt); + f2fs_msg(sb, KERN_ERR, + "reloading last checkpoint"); + goto get_cp; + } + f2fs_msg(sb, KERN_ERR, + "cannot recover all fsync data errno=%ld", err); + /* checkpoint what we have */ + write_checkpoint(sbi, false); + } + } + + /* + * If filesystem is not mounted as read-only then + * do start the gc_thread. + */ + if (!(sb->s_flags & MS_RDONLY)) { + /* After POR, we can run background GC thread.*/ + err = start_gc_thread(sbi); + if (err) + goto fail; + } + + err = f2fs_build_stats(sbi); + if (err) + goto fail; + + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + + if (test_opt(sbi, DISCARD)) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + if (!blk_queue_discard(q)) + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } + + if (test_opt(sbi, ANDROID_EMU)) + descr = " with android sdcard emulation"; + f2fs_msg(sb, KERN_INFO, "mounted filesystem%s", descr); + + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto fail; + + return 0; +fail: + stop_gc_thread(sbi); +free_root_inode: + iput(root); +free_node_inode: + iput(sbi->node_inode); +free_nm: + destroy_node_manager(sbi); +free_sm: + destroy_segment_manager(sbi); +free_cp: + kfree(sbi->ckpt); +free_meta_inode: + make_bad_inode(sbi->meta_inode); + iput(sbi->meta_inode); +free_sb_buf: + brelse(raw_super_buf); +free_sbi: + kfree(sbi); + f2fs_msg(sb, KERN_ERR, "mount failed"); + return err; +} + +static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); +} + +static struct file_system_type f2fs_fs_type = { + .owner = THIS_MODULE, + .name = "f2fs", + .mount = f2fs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_inodecache(void) +{ + f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", + sizeof(struct f2fs_inode_info), NULL); + if (f2fs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(f2fs_inode_cachep); +} + +static int __init init_f2fs_fs(void) +{ + int err; + + err = init_inodecache(); + if (err) + goto fail; + err = create_node_manager_caches(); + if (err) + goto free_inodecache; + err = create_gc_caches(); + if (err) + goto free_node_manager_caches; + err = create_checkpoint_caches(); + if (err) + goto free_gc_caches; + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) { + err = -ENOMEM; + goto free_checkpoint_caches; + } + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_kset; + f2fs_create_root_stats(); + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + return 0; + +free_kset: + kset_unregister(f2fs_kset); +free_checkpoint_caches: + destroy_checkpoint_caches(); +free_gc_caches: + destroy_gc_caches(); +free_node_manager_caches: + destroy_node_manager_caches(); +free_inodecache: + destroy_inodecache(); +fail: + return err; +} + +static void __exit exit_f2fs_fs(void) +{ + remove_proc_entry("fs/f2fs", NULL); + f2fs_destroy_root_stats(); + unregister_filesystem(&f2fs_fs_type); + destroy_checkpoint_caches(); + destroy_gc_caches(); + destroy_node_manager_caches(); + destroy_inodecache(); + kset_unregister(f2fs_kset); +} + +module_init(init_f2fs_fs) +module_exit(exit_f2fs_fs) + +MODULE_AUTHOR("Samsung Electronics's Praesto Team"); +MODULE_DESCRIPTION("Flash Friendly File System"); +MODULE_LICENSE("GPL"); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c new file mode 100644 index 00000000000..973b3c57e42 --- /dev/null +++ b/fs/f2fs/xattr.c @@ -0,0 +1,602 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/xattr.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher + * + * Fix by Harrison Xing . + * Extended attributes for symlinks and special files added per + * suggestion of Luka Renko . + * xattr consolidation Copyright (c) 2004 James Morris , + * Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include "f2fs.h" +#include "xattr.h" + +static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + int total_len, prefix_len = 0; + const char *prefix = NULL; + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + prefix = XATTR_USER_PREFIX; + prefix_len = XATTR_USER_PREFIX_LEN; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + prefix = XATTR_TRUSTED_PREFIX; + prefix_len = XATTR_TRUSTED_PREFIX_LEN; + break; + case F2FS_XATTR_INDEX_SECURITY: + prefix = XATTR_SECURITY_PREFIX; + prefix_len = XATTR_SECURITY_PREFIX_LEN; + break; + default: + return -EINVAL; + } + + total_len = prefix_len + name_len + 1; + if (list && total_len <= list_size) { + memcpy(list, prefix, prefix_len); + memcpy(list + prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + case F2FS_XATTR_INDEX_SECURITY: + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); +} + +static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + case F2FS_XATTR_INDEX_SECURITY: + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + + return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); +} + +static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; + size_t size; + + if (type != F2FS_XATTR_INDEX_ADVISE) + return 0; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct inode *inode = dentry->d_inode; + + if (!name || strcmp(name, "") != 0) + return -EINVAL; + + if (buffer) + *((char *)buffer) = F2FS_I(inode)->i_advise; + return sizeof(char); +} + +static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct inode *inode = dentry->d_inode; + + if (!name || strcmp(name, "") != 0) + return -EINVAL; + if (!inode_owner_or_capable(inode)) + return -EPERM; + if (value == NULL) + return -EINVAL; + + F2FS_I(inode)->i_advise = *(char *)value; + return 0; +} + +#ifdef CONFIG_F2FS_FS_SECURITY +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage); +static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *page) +{ + const struct xattr *xattr; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, (struct page *)page); + if (err < 0) + break; + } + return err; +} + +int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return security_new_inode_init_security(inode, dir, qstr, + &f2fs_initxattrs, ipage); +} +#endif + +const struct xattr_handler f2fs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = F2FS_XATTR_INDEX_USER, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = F2FS_XATTR_INDEX_TRUSTED, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_advise_handler = { + .prefix = F2FS_SYSTEM_ADVISE_PREFIX, + .flags = F2FS_XATTR_INDEX_ADVISE, + .list = f2fs_xattr_advise_list, + .get = f2fs_xattr_advise_get, + .set = f2fs_xattr_advise_set, +}; + +const struct xattr_handler f2fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = F2FS_XATTR_INDEX_SECURITY, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +static const struct xattr_handler *f2fs_xattr_handler_map[] = { + [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, +#endif + [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, +#endif + [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, +}; + +const struct xattr_handler *f2fs_xattr_handlers[] = { + &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + &f2fs_xattr_acl_access_handler, + &f2fs_xattr_acl_default_handler, +#endif + &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + &f2fs_xattr_security_handler, +#endif + &f2fs_xattr_advise_handler, + NULL, +}; + +static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +{ + const struct xattr_handler *handler = NULL; + + if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) + handler = f2fs_xattr_handler_map[name_index]; + return handler; +} + +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, + size_t name_len, const char *name) +{ + struct f2fs_xattr_entry *entry; + + list_for_each_xattr(entry, base_addr) { + if (entry->e_name_index != name_index) + continue; + if (entry->e_name_len != name_len) + continue; + if (!memcmp(entry->e_name, name, name_len)) + break; + } + return entry; +} + +static void *read_all_xattrs(struct inode *inode, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_xattr_header *header; + size_t size = PAGE_SIZE, inline_size = 0; + void *txattr_addr; + + inline_size = inline_xattr_size(inode); + + txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); + if (!txattr_addr) + return NULL; + + /* read from inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + goto fail; + inline_addr = inline_xattr_addr(page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + } + + /* read from xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) + goto fail; + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + f2fs_put_page(xpage, 1); + } + + header = XATTR_HDR(txattr_addr); + + /* never been allocated xattrs */ + if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { + header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); + header->h_refcount = cpu_to_le32(1); + } + return txattr_addr; +fail: + kzfree(txattr_addr); + return NULL; +} + +static inline int write_all_xattrs(struct inode *inode, __u32 hsize, + void *txattr_addr, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + size_t inline_size = 0; + void *xattr_addr; + struct page *xpage; + nid_t new_nid = 0; + int err; + + inline_size = inline_xattr_size(inode); + + if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) + if (!alloc_nid(sbi, &new_nid)) + return -ENOSPC; + + /* write to inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(page); + } + inline_addr = inline_xattr_addr(page); + } + memcpy(inline_addr, txattr_addr, inline_size); + f2fs_put_page(page, 1); + + /* no need to use xattr node block */ + if (hsize <= inline_size) { + err = truncate_xattr_node(inode, ipage); + alloc_nid_failed(sbi, new_nid); + return err; + } + } + + /* write to xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + BUG_ON(new_nid); + } else { + struct dnode_of_data dn; + set_new_dnode(&dn, inode, NULL, NULL, new_nid); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + alloc_nid_done(sbi, new_nid); + } + + xattr_addr = page_address(xpage); + memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - + sizeof(struct node_footer)); + set_page_dirty(xpage); + f2fs_put_page(xpage, 1); + + /* need to checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + return 0; +} + +int f2fs_getxattr(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct f2fs_xattr_entry *entry; + void *base_addr; + int error = 0; + size_t value_len, name_len; + + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; + + entry = __find_xattr(base_addr, name_index, name_len, name); + if (IS_XATTR_LAST_ENTRY(entry)) { + error = -ENODATA; + goto cleanup; + } + + value_len = le16_to_cpu(entry->e_value_size); + + if (buffer && value_len > buffer_size) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) { + char *pval = entry->e_name + entry->e_name_len; + memcpy(buffer, pval, value_len); + } + error = value_len; + +cleanup: + kzfree(base_addr); + return error; +} + +ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_xattr_entry *entry; + void *base_addr; + int error = 0; + size_t rest = buffer_size; + + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; + + list_for_each_xattr(entry, base_addr) { + const struct xattr_handler *handler = + f2fs_xattr_handler(entry->e_name_index); + size_t size; + + if (!handler) + continue; + + size = handler->list(dentry, buffer, rest, entry->e_name, + entry->e_name_len, handler->flags); + if (buffer && size > rest) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) + buffer += size; + rest -= size; + } + error = buffer_size - rest; +cleanup: + kzfree(base_addr); + return error; +} + +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_xattr_entry *here, *last; + void *base_addr; + int found, newsize; + size_t name_len; + __u32 new_hsize; + int error = -ENOMEM; + + if (name == NULL) + return -EINVAL; + + if (value == NULL) + value_len = 0; + + name_len = strlen(name); + + if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) + return -ERANGE; + + base_addr = read_all_xattrs(inode, ipage); + if (!base_addr) + goto exit; + + /* find entry with wanted name. */ + here = __find_xattr(base_addr, name_index, name_len, name); + + found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; + last = here; + + while (!IS_XATTR_LAST_ENTRY(last)) + last = XATTR_NEXT_ENTRY(last); + + newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + + name_len + value_len); + + /* 1. Check space */ + if (value) { + int free; + /* + * If value is NULL, it is remove operation. + * In case of update operation, we caculate free. + */ + free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); + if (found) + free = free - ENTRY_SIZE(here); + + if (free < newsize) { + error = -ENOSPC; + goto exit; + } + } + + /* 2. Remove old entry */ + if (found) { + /* + * If entry is found, remove old entry. + * If not found, remove operation is not needed. + */ + struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); + int oldsize = ENTRY_SIZE(here); + + memmove(here, next, (char *)last - (char *)next); + last = (struct f2fs_xattr_entry *)((char *)last - oldsize); + memset(last, 0, oldsize); + } + + new_hsize = (char *)last - (char *)base_addr; + + /* 3. Write new entry */ + if (value) { + char *pval; + /* + * Before we come here, old entry is removed. + * We just write new entry. + */ + memset(last, 0, newsize); + last->e_name_index = name_index; + last->e_name_len = name_len; + memcpy(last->e_name, name, name_len); + pval = last->e_name + name_len; + memcpy(pval, value, value_len); + last->e_value_size = cpu_to_le16(value_len); + new_hsize += newsize; + } + + error = write_all_xattrs(inode, new_hsize, base_addr, ipage); + if (error) + goto exit; + + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + inode->i_ctime = CURRENT_TIME; + clear_inode_flag(fi, FI_ACL_MODE); + } + + if (ipage) + update_inode(inode, ipage); + else + update_inode_page(inode); +exit: + kzfree(base_addr); + return error; +} + +int f2fs_setxattr(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; + int err; + + f2fs_balance_fs(sbi); + + ilock = mutex_lock_op(sbi); + + err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); + + mutex_unlock_op(sbi, ilock); + + return err; +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h new file mode 100644 index 00000000000..ae894d17cf7 --- /dev/null +++ b/fs/f2fs/xattr.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2014 XPerience(R) Project +/* + * fs/f2fs/xattr.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.h + * + * On-disk format of extended attributes for the ext2 filesystem. + * + * (C) 2001 Andreas Gruenbacher, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_XATTR_H__ +#define __F2FS_XATTR_H__ + +#include +#include + +/* Magic value in attribute blocks */ +#define F2FS_XATTR_MAGIC 0xF2F52011 + +/* Maximum number of references to one attribute block */ +#define F2FS_XATTR_REFCOUNT_MAX 1024 + +/* Name indexes */ +#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise" +#define F2FS_XATTR_INDEX_USER 1 +#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define F2FS_XATTR_INDEX_TRUSTED 4 +#define F2FS_XATTR_INDEX_LUSTRE 5 +#define F2FS_XATTR_INDEX_SECURITY 6 +#define F2FS_XATTR_INDEX_ADVISE 7 + +struct f2fs_xattr_header { + __le32 h_magic; /* magic number for identification */ + __le32 h_refcount; /* reference count */ + __u32 h_reserved[4]; /* zero right now */ +}; + +struct f2fs_xattr_entry { + __u8 e_name_index; + __u8 e_name_len; + __le16 e_value_size; /* size of attribute value */ + char e_name[0]; /* attribute name */ +}; + +#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) +#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) +#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) +#define XATTR_ROUND (3) + +#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) + +#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ + entry->e_name_len + le16_to_cpu(entry->e_value_size))) + +#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ + ENTRY_SIZE(entry))) + +#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) + +#define list_for_each_xattr(entry, addr) \ + for (entry = XATTR_FIRST_ENTRY(addr);\ + !IS_XATTR_LAST_ENTRY(entry);\ + entry = XATTR_NEXT_ENTRY(entry)) + +#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ + sizeof(struct node_footer) - sizeof(__u32)) + +#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ + sizeof(struct f2fs_xattr_header) - \ + sizeof(struct f2fs_xattr_entry)) + +/* + * On-disk structure of f2fs_xattr + * We use inline xattrs space + 1 block for xattr. + * + * +--------------------+ + * | f2fs_xattr_header | + * | | + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 1 | + * | .e_name_len = 3 | + * | .e_value_size = 14 | + * | .e_name = "foo" | + * | "value_of_xattr" |<- value_offs = e_name + e_name_len + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 4 | + * | .e_name = "bar" | + * +--------------------+ + * | | + * | Free | + * | | + * +--------------------+<- MIN_OFFSET + * | node_footer | + * | (nid, ino, offset) | + * +--------------------+ + * + **/ + +#ifdef CONFIG_F2FS_FS_XATTR +extern const struct xattr_handler f2fs_xattr_user_handler; +extern const struct xattr_handler f2fs_xattr_trusted_handler; +extern const struct xattr_handler f2fs_xattr_acl_access_handler; +extern const struct xattr_handler f2fs_xattr_acl_default_handler; +extern const struct xattr_handler f2fs_xattr_advise_handler; +extern const struct xattr_handler f2fs_xattr_security_handler; + +extern const struct xattr_handler *f2fs_xattr_handlers[]; + +extern int f2fs_setxattr(struct inode *, int, const char *, + const void *, size_t, struct page *); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +#else + +#define f2fs_xattr_handlers NULL +static inline int f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len) +{ + return -EOPNOTSUPP; +} +static inline int f2fs_getxattr(struct inode *inode, int name_index, + const char *name, void *buffer, size_t buffer_size) +{ + return -EOPNOTSUPP; +} +static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_F2FS_FS_SECURITY +extern int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct page *); +#else +static inline int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return 0; +} +#endif +#endif /* __F2FS_XATTR_H__ */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 713c7c62443..9327888b4b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -239,6 +239,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern struct dentry * d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h new file mode 100644 index 00000000000..8003f8e092d --- /dev/null +++ b/include/linux/f2fs_fs.h @@ -0,0 +1,426 @@ +/** + * include/linux/f2fs_fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Copyright (c) 2014 XPerience(R) Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_FS_H +#define _LINUX_F2FS_FS_H + +#include +#include + +#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ +#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ +#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ +#define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ + +#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ +#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ + +#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) +#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) +#define F2FS_META_INO(sbi) (sbi->meta_ino_num) + +/* This flag is used by node and meta inodes, and by recovery */ +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) + +/* + * For further optimization on multi-head logs, on-disk layout supports maximum + * 16 logs by default. The number, 16, is expected to cover all the cases + * enoughly. The implementaion currently uses no more than 6 logs. + * Half the logs are used for nodes, and the other half are used for data. + */ +#define MAX_ACTIVE_LOGS 16 +#define MAX_ACTIVE_NODE_LOGS 8 +#define MAX_ACTIVE_DATA_LOGS 8 + +/* + * For superblock + */ +struct f2fs_super_block { + __le32 magic; /* Magic Number */ + __le16 major_ver; /* Major Version */ + __le16 minor_ver; /* Minor Version */ + __le32 log_sectorsize; /* log2 sector size in bytes */ + __le32 log_sectors_per_block; /* log2 # of sectors per block */ + __le32 log_blocksize; /* log2 block size in bytes */ + __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ + __le32 segs_per_sec; /* # of segments per section */ + __le32 secs_per_zone; /* # of sections per zone */ + __le32 checksum_offset; /* checksum offset inside super block */ + __le64 block_count; /* total # of user blocks */ + __le32 section_count; /* total # of sections */ + __le32 segment_count; /* total # of segments */ + __le32 segment_count_ckpt; /* # of segments for checkpoint */ + __le32 segment_count_sit; /* # of segments for SIT */ + __le32 segment_count_nat; /* # of segments for NAT */ + __le32 segment_count_ssa; /* # of segments for SSA */ + __le32 segment_count_main; /* # of segments for main area */ + __le32 segment0_blkaddr; /* start block address of segment 0 */ + __le32 cp_blkaddr; /* start block address of checkpoint */ + __le32 sit_blkaddr; /* start block address of SIT */ + __le32 nat_blkaddr; /* start block address of NAT */ + __le32 ssa_blkaddr; /* start block address of SSA */ + __le32 main_blkaddr; /* start block address of main area */ + __le32 root_ino; /* root inode number */ + __le32 node_ino; /* node inode number */ + __le32 meta_ino; /* meta inode number */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __le16 volume_name[512]; /* volume name */ + __le32 extension_count; /* # of extensions below */ + __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ +} __packed; + +/* + * For checkpoint + */ +#define CP_ERROR_FLAG 0x00000008 +#define CP_COMPACT_SUM_FLAG 0x00000004 +#define CP_ORPHAN_PRESENT_FLAG 0x00000002 +#define CP_UMOUNT_FLAG 0x00000001 + +struct f2fs_checkpoint { + __le64 checkpoint_ver; /* checkpoint block version number */ + __le64 user_block_count; /* # of user blocks */ + __le64 valid_block_count; /* # of valid blocks in main area */ + __le32 rsvd_segment_count; /* # of reserved segments for gc */ + __le32 overprov_segment_count; /* # of overprovision segments */ + __le32 free_segment_count; /* # of free segments in main area */ + + /* information of current node segments */ + __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; + __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; + /* information of current data segments */ + __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; + __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; + __le32 ckpt_flags; /* Flags : umount and journal_present */ + __le32 cp_pack_total_block_count; /* total # of one cp pack */ + __le32 cp_pack_start_sum; /* start block number of data summary */ + __le32 valid_node_count; /* Total number of valid nodes */ + __le32 valid_inode_count; /* Total number of valid inodes */ + __le32 next_free_nid; /* Next free node number */ + __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ + __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ + __le32 checksum_offset; /* checksum offset inside cp block */ + __le64 elapsed_time; /* mounted time */ + /* allocation type of current segment */ + unsigned char alloc_type[MAX_ACTIVE_LOGS]; + + /* SIT and NAT version bitmap */ + unsigned char sit_nat_version_bitmap[1]; +} __packed; + +/* + * For orphan inode management + */ +#define F2FS_ORPHANS_PER_BLOCK 1020 + +struct f2fs_orphan_block { + __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ + __le32 reserved; /* reserved */ + __le16 blk_addr; /* block index in current CP */ + __le16 blk_count; /* Number of orphan inode blocks in CP */ + __le32 entry_count; /* Total number of orphan nodes in current CP */ + __le32 check_sum; /* CRC32 for orphan inode block */ +} __packed; + +/* + * For NODE structure + */ +struct f2fs_extent { + __le32 fofs; /* start file offset of the extent */ + __le32 blk_addr; /* start block address of the extent */ + __le32 len; /* lengh of the extent */ +} __packed; + +#define F2FS_NAME_LEN 255 +#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_INODE(fi) addrs_per_inode(fi) +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) + +#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ + +struct f2fs_inode { + __le16 i_mode; /* file mode */ + __u8 i_advise; /* file hints */ + __u8 i_inline; /* file inline flags */ + __le32 i_uid; /* user ID */ + __le32 i_gid; /* group ID */ + __le32 i_links; /* links count */ + __le64 i_size; /* file size in bytes */ + __le64 i_blocks; /* file size in blocks */ + __le64 i_atime; /* access time */ + __le64 i_ctime; /* change time */ + __le64 i_mtime; /* modification time */ + __le32 i_atime_nsec; /* access time in nano scale */ + __le32 i_ctime_nsec; /* change time in nano scale */ + __le32 i_mtime_nsec; /* modification time in nano scale */ + __le32 i_generation; /* file version (for NFS) */ + __le32 i_current_depth; /* only for directory depth */ + __le32 i_xattr_nid; /* nid to save xattr */ + __le32 i_flags; /* file attributes */ + __le32 i_pino; /* parent inode number */ + __le32 i_namelen; /* file name length */ + __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ + __u8 i_reserved2; /* for backward compatibility */ + + struct f2fs_extent i_ext; /* caching a largest extent */ + + __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ + + __le32 i_nid[5]; /* direct(2), indirect(2), + double_indirect(1) node id */ +} __packed; + +struct direct_node { + __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ +} __packed; + +struct indirect_node { + __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ +} __packed; + +enum { + COLD_BIT_SHIFT = 0, + FSYNC_BIT_SHIFT, + DENT_BIT_SHIFT, + OFFSET_BIT_SHIFT +}; + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode nunmber */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +struct f2fs_node { + /* can be one of three types: inode, direct, and indirect types */ + union { + struct f2fs_inode i; + struct direct_node dn; + struct indirect_node in; + }; + struct node_footer footer; +} __packed; + +/* + * For NAT entries + */ +#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) + +struct f2fs_nat_entry { + __u8 version; /* latest version of cached nat entry */ + __le32 ino; /* inode number */ + __le32 block_addr; /* block address */ +} __packed; + +struct f2fs_nat_block { + struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For SIT entries + * + * Each segment is 2MB in size by default so that a bitmap for validity of + * there-in blocks should occupy 64 bytes, 512 bits. + * Not allow to change this. + */ +#define SIT_VBLOCK_MAP_SIZE 64 +#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) + +/* + * Note that f2fs_sit_entry->vblocks has the following bit-field information. + * [15:10] : allocation type such as CURSEG_XXXX_TYPE + * [9:0] : valid block count + */ +#define SIT_VBLOCKS_SHIFT 10 +#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) +#define GET_SIT_VBLOCKS(raw_sit) \ + (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) +#define GET_SIT_TYPE(raw_sit) \ + ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ + >> SIT_VBLOCKS_SHIFT) + +struct f2fs_sit_entry { + __le16 vblocks; /* reference above */ + __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ + __le64 mtime; /* segment age for cleaning */ +} __packed; + +struct f2fs_sit_block { + struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For segment summary + * + * One summary block contains exactly 512 summary entries, which represents + * exactly 2MB segment by default. Not allow to change the basic units. + * + * NOTE: For initializing fields, you must use set_summary + * + * - If data page, nid represents dnode's nid + * - If node page, nid represents the node page's nid. + * + * The ofs_in_node is used by only data page. It represents offset + * from node's page's beginning to get a data block address. + * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) + */ +#define ENTRIES_IN_SUM 512 +#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ +#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) + +/* a summary entry for a 4KB-sized block in a segment */ +struct f2fs_summary { + __le32 nid; /* parent node id */ + union { + __u8 reserved[3]; + struct { + __u8 version; /* node version number */ + __le16 ofs_in_node; /* block index in parent node */ + } __packed; + }; +} __packed; + +/* summary block type, node or data, is stored to the summary_footer */ +#define SUM_TYPE_NODE (1) +#define SUM_TYPE_DATA (0) + +struct summary_footer { + unsigned char entry_type; /* SUM_TYPE_XXX */ + __u32 check_sum; /* summary checksum */ +} __packed; + +#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ + SUM_ENTRY_SIZE) +#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct nat_journal_entry)) +#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct nat_journal_entry)) +#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct sit_journal_entry)) +#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct sit_journal_entry)) +/* + * frequently updated NAT/SIT entries can be stored in the spare area in + * summary blocks + */ +enum { + NAT_JOURNAL = 0, + SIT_JOURNAL +}; + +struct nat_journal_entry { + __le32 nid; + struct f2fs_nat_entry ne; +} __packed; + +struct nat_journal { + struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; + __u8 reserved[NAT_JOURNAL_RESERVED]; +} __packed; + +struct sit_journal_entry { + __le32 segno; + struct f2fs_sit_entry se; +} __packed; + +struct sit_journal { + struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; + __u8 reserved[SIT_JOURNAL_RESERVED]; +} __packed; + +/* 4KB-sized summary block structure */ +struct f2fs_summary_block { + struct f2fs_summary entries[ENTRIES_IN_SUM]; + union { + __le16 n_nats; + __le16 n_sits; + }; + /* spare area is used by NAT or SIT journals */ + union { + struct nat_journal nat_j; + struct sit_journal sit_j; + }; + struct summary_footer footer; +} __packed; + +/* + * For directory operations + */ +#define F2FS_DOT_HASH 0 +#define F2FS_DDOT_HASH F2FS_DOT_HASH +#define F2FS_MAX_HASH (~((0x3ULL) << 62)) +#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) + +typedef __le32 f2fs_hash_t; + +/* One directory entry slot covers 8bytes-long file name */ +#define F2FS_SLOT_LEN 8 +#define F2FS_SLOT_LEN_BITS 3 + +#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK 214 + +/* MAX level for dir lookup */ +#define MAX_DIR_HASH_DEPTH 63 + +#define SIZE_OF_DIR_ENTRY 11 /* by byte */ +#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ + BITS_PER_BYTE) +#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ + F2FS_SLOT_LEN) * \ + NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) + +/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ +struct f2fs_dir_entry { + __le32 hash_code; /* hash code of file name */ + __le32 ino; /* inode number */ + __le16 name_len; /* lengh of file name */ + __u8 file_type; /* file type */ +} __packed; + +/* 4KB-sized directory entry block */ +struct f2fs_dentry_block { + /* validity bitmap for directory entries in each block */ + __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; + __u8 reserved[SIZE_OF_RESERVED]; + struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; +} __packed; + +/* file types used in inode_info->flags */ +enum { + F2FS_FT_UNKNOWN, + F2FS_FT_REG_FILE, + F2FS_FT_DIR, + F2FS_FT_CHRDEV, + F2FS_FT_BLKDEV, + F2FS_FT_FIFO, + F2FS_FT_SOCK, + F2FS_FT_SYMLINK, + F2FS_FT_MAX +}; + +#endif /* _LINUX_F2FS_FS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c3b043a645..2e9d230ff95 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1743,6 +1743,19 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } +/** + * set_nlink - directly set an inode's link count + * @inode: inode + * @nlink: new nlink (should be non-zero) + * + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. + */ +static inline void set_nlink(struct inode *inode, unsigned int nlink) +{ + inode->i_nlink = nlink; +} + /** * inc_nlink - directly increment an inode's link count * @inode: inode diff --git a/include/linux/magic.h b/include/linux/magic.h index 1e5df2af8d8..2616b546e83 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -24,6 +24,7 @@ #define EXT4_SUPER_MAGIC 0xEF53 #define BTRFS_SUPER_MAGIC 0x9123683E #define NILFS_SUPER_MAGIC 0x3434 +#define F2FS_SUPER_MAGIC 0xF2F52010 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 diff --git a/include/linux/security.h b/include/linux/security.h index 95a6d8e24df..60209254b64 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -6,6 +6,7 @@ * Copyright (C) 2001 Networks Associates Technology, Inc * Copyright (C) 2001 James Morris * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (c) 2014 XPerience(R) Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +37,7 @@ #include #include #include +#include #include /* Maximum number of letters for an LSM name string */ @@ -147,6 +149,10 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +/* security_inode_init_security callback function to write xattrs */ +typedef int (*initxattrs) (struct inode *inode, + const struct xattr *xattr_array, void *fs_data); + #ifdef CONFIG_SECURITY struct security_mnt_opts { @@ -1715,6 +1721,9 @@ void security_inode_free(struct inode *inode); int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, char **name, void **value, size_t *len); +int security_new_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + initxattrs initxattrs, void *fs_data); int security_inode_create(struct inode *dir, struct dentry *dentry, int mode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); @@ -2067,7 +2076,16 @@ static inline int security_inode_init_security(struct inode *inode, void **value, size_t *len) { - return -EOPNOTSUPP; + return 0; +} + +static inline int security_new_inode_init_security(struct inode *inode, + struct inode *dir, + const struct qstr *qstr, + initxattrs initxattrs, + void *fs_data) +{ + return 0; } static inline int security_inode_create(struct inode *dir, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index aed54c50aa6..7a378662ddf 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -67,6 +67,12 @@ struct xattr_handler { size_t size, int flags, int handler_flags); }; +struct xattr { + char *name; + void *value; + size_t value_len; +}; + ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h new file mode 100644 index 00000000000..52ae54828ed --- /dev/null +++ b/include/trace/events/f2fs.h @@ -0,0 +1,682 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM f2fs + +#if !defined(_TRACE_F2FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_F2FS_H + +#include + +#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) +#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino + +#define show_block_type(type) \ + __print_symbolic(type, \ + { NODE, "NODE" }, \ + { DATA, "DATA" }, \ + { META, "META" }, \ + { META_FLUSH, "META_FLUSH" }) + +#define show_bio_type(type) \ + __print_symbolic(type, \ + { READ, "READ" }, \ + { READA, "READAHEAD" }, \ + { READ_SYNC, "READ_SYNC" }, \ + { WRITE, "WRITE" }, \ + { WRITE_SYNC, "WRITE_SYNC" }, \ + { WRITE_FLUSH, "WRITE_FLUSH" }, \ + { WRITE_FUA, "WRITE_FUA" }) + +#define show_data_type(type) \ + __print_symbolic(type, \ + { CURSEG_HOT_DATA, "Hot DATA" }, \ + { CURSEG_WARM_DATA, "Warm DATA" }, \ + { CURSEG_COLD_DATA, "Cold DATA" }, \ + { CURSEG_HOT_NODE, "Hot NODE" }, \ + { CURSEG_WARM_NODE, "Warm NODE" }, \ + { CURSEG_COLD_NODE, "Cold NODE" }, \ + { NO_CHECK_TYPE, "No TYPE" }) + +#define show_gc_type(type) \ + __print_symbolic(type, \ + { FG_GC, "Foreground GC" }, \ + { BG_GC, "Background GC" }) + +#define show_alloc_mode(type) \ + __print_symbolic(type, \ + { LFS, "LFS-mode" }, \ + { SSR, "SSR-mode" }) + +#define show_victim_policy(type) \ + __print_symbolic(type, \ + { GC_GREEDY, "Greedy" }, \ + { GC_CB, "Cost-Benefit" }) + +struct victim_sel_policy; + +DECLARE_EVENT_CLASS(f2fs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(ino_t, pino) + __field(umode_t, mode) + __field(loff_t, size) + __field(unsigned int, nlink) + __field(blkcnt_t, blocks) + __field(__u8, advise) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pino = F2FS_I(inode)->i_pino; + __entry->mode = inode->i_mode; + __entry->nlink = inode->i_nlink; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->advise = F2FS_I(inode)->i_advise; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pino = %lu, i_mode = 0x%hx, " + "i_size = %lld, i_nlink = %u, i_blocks = %llu, i_advise = 0x%x", + show_dev_ino(__entry), + (unsigned long)__entry->pino, + __entry->mode, + __entry->size, + (unsigned int)__entry->nlink, + (unsigned long long)__entry->blocks, + (unsigned char)__entry->advise) +); + +DECLARE_EVENT_CLASS(f2fs__inode_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, ret = %d", + show_dev_ino(__entry), + __entry->ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_sync_file_exit, + + TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), + + TP_ARGS(inode, need_cp, datasync, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(bool, need_cp) + __field(int, datasync) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->need_cp = need_cp; + __entry->datasync = datasync; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " + "datasync = %d, ret = %d", + show_dev_ino(__entry), + __entry->need_cp ? "needed" : "not needed", + __entry->datasync, + __entry->ret) +); + +TRACE_EVENT(f2fs_sync_fs, + + TP_PROTO(struct super_block *sb, int wait), + + TP_ARGS(sb, wait), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, dirty) + __field(int, wait) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->dirty = F2FS_SB(sb)->s_dirty; + __entry->wait = wait; + ), + + TP_printk("dev = (%d,%d), superblock is %s, wait = %d", + show_dev(__entry), + __entry->dirty ? "dirty" : "not dirty", + __entry->wait) +); + +DEFINE_EVENT(f2fs__inode, f2fs_iget, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_iget_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_evict_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +TRACE_EVENT(f2fs_unlink_enter, + + TP_PROTO(struct inode *dir, struct dentry *dentry), + + TP_ARGS(dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(const char *, name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->size = dir->i_size; + __entry->blocks = dir->i_blocks; + __entry->name = dentry->d_name.name; + ), + + TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, " + "i_blocks = %llu, name = %s", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + __entry->name) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_truncate, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_truncate_data_blocks_range, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs, int free), + + TP_ARGS(inode, nid, ofs, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(unsigned int, ofs) + __field(int, free) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->ofs = ofs; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, offset = %u, freed = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + __entry->ofs, + __entry->free) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_op, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(u64, from) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->from = from; + ), + + TP_printk("dev = (%d,%d), ino = %lu, i_size = %lld, i_blocks = %llu, " + "start file offset = %llu", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + (unsigned long long)__entry->from) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_inode_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_inode_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(block_t, blk_addr) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->blk_addr = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, block_address = 0x%llx", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + (unsigned long long)__entry->blk_addr) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_nodes_enter, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_nodes_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +TRACE_EVENT(f2fs_truncate_partial_nodes, + + TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), + + TP_ARGS(inode, nid, depth, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid[3]) + __field(int, depth) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid[0] = nid[0]; + __entry->nid[1] = nid[1]; + __entry->nid[2] = nid[2]; + __entry->depth = depth; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), ino = %lu, " + "nid[0] = %u, nid[1] = %u, nid[2] = %u, depth = %d, err = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid[0], + (unsigned int)__entry->nid[1], + (unsigned int)__entry->nid[2], + __entry->depth, + __entry->err) +); + +TRACE_EVENT_CONDITION(f2fs_readpage, + + TP_PROTO(struct page *page, sector_t blkaddr, int type), + + TP_ARGS(page, blkaddr, type), + + TP_CONDITION(page->mapping), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(sector_t, blkaddr) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = blkaddr; + __entry->type = type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, bio_type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->type)) +); + +TRACE_EVENT(f2fs_get_data_block, + TP_PROTO(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int ret), + + TP_ARGS(inode, iblock, bh, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(sector_t, iblock) + __field(sector_t, bh_start) + __field(size_t, bh_size) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->iblock = iblock; + __entry->bh_start = bh->b_blocknr; + __entry->bh_size = bh->b_size; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " + "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", + show_dev_ino(__entry), + (unsigned long long)__entry->iblock, + (unsigned long long)__entry->bh_start, + (unsigned long long)__entry->bh_size, + __entry->ret) +); + +TRACE_EVENT(f2fs_get_victim, + + TP_PROTO(struct super_block *sb, int type, int gc_type, + struct victim_sel_policy *p, unsigned int pre_victim, + unsigned int prefree, unsigned int free), + + TP_ARGS(sb, type, gc_type, p, pre_victim, prefree, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, gc_type) + __field(int, alloc_mode) + __field(int, gc_mode) + __field(unsigned int, victim) + __field(unsigned int, ofs_unit) + __field(unsigned int, pre_victim) + __field(unsigned int, prefree) + __field(unsigned int, free) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->type = type; + __entry->gc_type = gc_type; + __entry->alloc_mode = p->alloc_mode; + __entry->gc_mode = p->gc_mode; + __entry->victim = p->min_segno; + __entry->ofs_unit = p->ofs_unit; + __entry->pre_victim = pre_victim; + __entry->prefree = prefree; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " + "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", + show_dev(__entry), + show_data_type(__entry->type), + show_gc_type(__entry->gc_type), + show_alloc_mode(__entry->alloc_mode), + show_victim_policy(__entry->gc_mode), + __entry->victim, + __entry->ofs_unit, + (int)__entry->pre_victim, + __entry->prefree, + __entry->free) +); + +TRACE_EVENT(f2fs_fallocate, + + TP_PROTO(struct inode *inode, int mode, + loff_t offset, loff_t len, int ret), + + TP_ARGS(inode, mode, offset, len, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, mode) + __field(loff_t, offset) + __field(loff_t, len) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = mode; + __entry->offset = offset; + __entry->len = len; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, mode = %x, offset = %lld, " + "len = %lld, i_size = %lld, i_blocks = %llu, ret = %d", + show_dev_ino(__entry), + __entry->mode, + (unsigned long long)__entry->offset, + (unsigned long long)__entry->len, + (unsigned long long)__entry->size, + (unsigned long long)__entry->blocks, + __entry->ret) +); + +TRACE_EVENT(f2fs_reserve_new_block, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), + + TP_ARGS(inode, nid, ofs_in_node), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(nid_t, nid) + __field(unsigned int, ofs_in_node) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = nid; + __entry->ofs_in_node = ofs_in_node; + ), + + TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", + show_dev(__entry), + (unsigned int)__entry->nid, + __entry->ofs_in_node) +); + +TRACE_EVENT(f2fs_do_submit_bio, + + TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), + + TP_ARGS(sb, btype, sync, bio), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, btype) + __field(bool, sync) + __field(sector_t, sector) + __field(unsigned int, size) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->btype = btype; + __entry->sync = sync; + __entry->sector = bio->bi_sector; + __entry->size = bio->bi_size; + ), + + TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", + show_dev(__entry), + show_block_type(__entry->btype), + __entry->sync ? "sync" : "no sync", + (unsigned long long)__entry->sector, + __entry->size) +); + +TRACE_EVENT(f2fs_submit_write_page, + + TP_PROTO(struct page *page, block_t blk_addr, int type), + + TP_ARGS(page, blk_addr, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, type) + __field(pgoff_t, index) + __field(block_t, block) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->type = type; + __entry->index = page->index; + __entry->block = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", + show_dev_ino(__entry), + show_block_type(__entry->type), + (unsigned long)__entry->index, + (unsigned long long)__entry->block) +); + +TRACE_EVENT(f2fs_write_checkpoint, + + TP_PROTO(struct super_block *sb, bool is_umount, char *msg), + + TP_ARGS(sb, is_umount, msg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(bool, is_umount) + __field(char *, msg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->is_umount = is_umount; + __entry->msg = msg; + ), + + TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", + show_dev(__entry), + __entry->is_umount ? "clean umount" : "consistency", + __entry->msg) +); + +#endif /* _TRACE_F2FS_H */ + + /* This part must be outside protection */ +#include diff --git a/security/security.c b/security/security.c index 420198e5f32..5ee47fc41ea 100644 --- a/security/security.c +++ b/security/security.c @@ -4,6 +4,7 @@ * Copyright (C) 2001 WireX Communications, Inc * Copyright (C) 2001-2002 Greg Kroah-Hartman * Copyright (C) 2001 Networks Associates Technology, Inc + * Copyright (c) 2014 XPerience(R) Project * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,6 +19,8 @@ #include #include +#define MAX_LSM_XATTR 1 + /* Boot-time LSM user choice */ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = CONFIG_DEFAULT_SECURITY; @@ -369,6 +372,37 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, } EXPORT_SYMBOL(security_inode_init_security); +int security_new_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + const initxattrs initxattrs, void *fs_data) +{ + struct xattr new_xattrs[MAX_LSM_XATTR + 1]; + struct xattr *lsm_xattr; + int ret; + + if (unlikely(IS_PRIVATE(inode))) + return -EOPNOTSUPP; + + memset(new_xattrs, 0, sizeof new_xattrs); + if (!initxattrs) + return security_ops->inode_init_security(inode, dir, qstr, + NULL, NULL, NULL); + lsm_xattr = new_xattrs; + ret = security_ops->inode_init_security(inode, dir, qstr, + &lsm_xattr->name, + &lsm_xattr->value, + &lsm_xattr->value_len); + if (ret) + goto out; + ret = initxattrs(inode, new_xattrs, fs_data); +out: + kfree(lsm_xattr->name); + kfree(lsm_xattr->value); + + return (ret == -EOPNOTSUPP) ? 0 : ret; +} +EXPORT_SYMBOL(security_new_inode_init_security); + #ifdef CONFIG_SECURITY_PATH int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, unsigned int dev) From d6c2d0ec7cdefcaa32526f0b2694e9dff9fce80a Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 5 Jun 2014 20:47:55 -0400 Subject: [PATCH 619/678] defconfig: a68 and increase boosting --- arch/arm/configs/metallice_grouper_defconfig | 10 ++++++---- drivers/cpufreq/cpufreq_touchdemand.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 7381367c327..1282bca05a6 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a67" +CONFIG_LOCALVERSION="-MKernel-68" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y @@ -3027,9 +3027,6 @@ CONFIG_PROC_PAGE_MONITOR=y CONFIG_REPORT_PRESENT_CPUS=y CONFIG_SYSFS=y CONFIG_TMPFS=y -CONFIG_F2FS_FS=y -CONFIG_F2FS_FS_XATTR=y -CONFIG_F2FS_FS_SECURITY=y # CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_TMPFS_XATTR is not set # CONFIG_HUGETLB_PAGE is not set @@ -3055,6 +3052,11 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_PSTORE is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +CONFIG_F2FS_FS=y +CONFIG_F2FS_STAT_FS=y +CONFIG_F2FS_FS_XATTR=y +CONFIG_F2FS_FS_POSIX_ACL=y +CONFIG_F2FS_FS_SECURITY=y CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set diff --git a/drivers/cpufreq/cpufreq_touchdemand.c b/drivers/cpufreq/cpufreq_touchdemand.c index 7d1fd14c43d..2b56894521c 100644 --- a/drivers/cpufreq/cpufreq_touchdemand.c +++ b/drivers/cpufreq/cpufreq_touchdemand.c @@ -375,7 +375,7 @@ static ssize_t store_touch_factor(struct kobject *a, struct attribute *b, return count; } -static unsigned int Touch_poke_attr[4] = {1200000, 1200000, 0, 0}; +static unsigned int Touch_poke_attr[4] = {1300000, 1200000, 1000000, 0}; static unsigned int Touch_poke_boost = 1; static unsigned long Touch_poke_boost_till_jiffies = 0; From 0550e928eb9672a2403738472a23058e22865654 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 6 Jul 2012 14:13:29 -0400 Subject: [PATCH 620/678] SELinux: include definition of new capabilities The kernel has added CAP_WAKE_ALARM and CAP_EPOLLWAKEUP. We need to define these in SELinux so they can be mediated by policy. Change-Id: I8a3e0db15ec5f4eb05d455a57e8446a8c2b484c2 Signed-off-by: Eric Paris Signed-off-by: James Morris [sds: rename epollwakeup to block_suspend to match upstream merge] Signed-off-by: Stephen Smalley Signed-off-by: Ed Tam --- security/selinux/include/classmap.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 4a4a9aebca9..20b00fc37cc 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -145,7 +145,9 @@ struct security_class_mapping secclass_map[] = { "node_bind", "name_connect", NULL } }, { "memprotect", { "mmap_zero", NULL } }, { "peer", { "recv", NULL } }, - { "capability2", { "mac_override", "mac_admin", "syslog", NULL } }, + { "capability2", + { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend", + NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, From 9e0649fb0895719e808f40e817f39391829e733d Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Mon, 8 Apr 2013 15:09:26 -0700 Subject: [PATCH 621/678] netfilter: qtaguid: rate limit some of the printks Some of the printks are in the packet handling path. We now ratelimit the very unlikely errors to avoid kmsg spamming. Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 495b62ea0b6..88c61bdf1d2 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1328,12 +1329,12 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, } if (unlikely(!el_dev)) { - pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", - par->hooknum, __func__); + pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n", + par->hooknum, __func__); BUG(); } else if (unlikely(!el_dev->name)) { - pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", - par->hooknum, __func__); + pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n", + par->hooknum, __func__); BUG(); } else { proto = ipx_proto(skb, par); @@ -1416,8 +1417,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, iface_entry = get_iface_entry(ifname); if (!iface_entry) { - pr_err("qtaguid: iface_stat: stat_update() %s not found\n", - ifname); + pr_err_ratelimited("qtaguid: iface_stat: stat_update() " + "%s not found\n", ifname); return; } /* It is ok to process data when an iface_entry is inactive */ From 5dfdc36e81f1c68acb3d62f963b9594a5d8751c0 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Fri, 20 Dec 2013 16:51:11 -0800 Subject: [PATCH 622/678] nf: xt_qtaguid: fix handling for cases where tunnels are used. * fix skb->dev vs par->in/out When there is some forwarding going on, it introduces extra state around devs associated with xt_action_param->in/out and sk_buff->dev. E.g. par->in and par->out are both set, or skb->dev and par->out are both set (and different) This would lead qtaguid to make the wrong assumption about the direction and update the wrong device stats. Now we rely more on par->in/out. * Fix handling when qtaguid is used as "owner" When qtaguid is used as an owner module, and sk_socket->file is not there (happens when tunnels are involved), it would incorrectly do a tag stats update. * Correct debug messages. Bug: 11687690 Change-Id: I2b1ff8bd7131969ce9e25f8291d83a6280b3ba7f Signed-off-by: JP Abgrall (cherry picked from commit 2b71479d6f5fe8f33b335f713380f72037244395) --- net/netfilter/xt_qtaguid.c | 150 +++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 81 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 88c61bdf1d2..aa5f0919a1b 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1298,6 +1298,38 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only) spin_unlock_bh(&iface_stat_list_lock); } +/* Guarantied to return a net_device that has a name */ +static void get_dev_and_dir(const struct sk_buff *skb, + struct xt_action_param *par, + enum ifs_tx_rx *direction, + const struct net_device **el_dev) +{ + BUG_ON(!direction || !el_dev); + + if (par->in) { + *el_dev = par->in; + *direction = IFS_RX; + } else if (par->out) { + *el_dev = par->out; + *direction = IFS_TX; + } else { + pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", + par->hooknum, __func__); + BUG(); + } + if (unlikely(!(*el_dev)->name)) { + pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", + par->hooknum, __func__); + BUG(); + } + if (skb->dev && *el_dev != skb->dev) { + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs par->%s=%p %s\n", + par->hooknum, skb->dev, skb->dev->name, + *direction == IFS_RX ? "in" : "out", *el_dev, + (*el_dev)->name); + } +} + /* * Update stats for the specified interface from the skb. * Do nothing if the entry @@ -1309,50 +1341,27 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, { struct iface_stat *entry; const struct net_device *el_dev; - enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; + enum ifs_tx_rx direction; int bytes = skb->len; int proto; - if (!skb->dev) { - MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); - el_dev = par->in ? : par->out; - } else { - const struct net_device *other_dev; - el_dev = skb->dev; - other_dev = par->in ? : par->out; - if (el_dev != other_dev) { - MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " - "par->(in/out)=%p %s\n", - par->hooknum, el_dev, el_dev->name, other_dev, - other_dev->name); - } - } - - if (unlikely(!el_dev)) { - pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n", - par->hooknum, __func__); - BUG(); - } else if (unlikely(!el_dev->name)) { - pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n", - par->hooknum, __func__); - BUG(); - } else { - proto = ipx_proto(skb, par); - MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", - par->hooknum, el_dev->name, el_dev->type, - par->family, proto); - } + get_dev_and_dir(skb, par, &direction, &el_dev); + proto = ipx_proto(skb, par); + MT_DEBUG("qtaguid[%d]: iface_stat: %s(%s): " + "type=%d fam=%d proto=%d dir=%d\n", + par->hooknum, __func__, el_dev->name, el_dev->type, + par->family, proto, direction); spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(el_dev->name); if (entry == NULL) { - IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n", - __func__, el_dev->name); + IF_DEBUG("qtaguid[%d]: iface_stat: %s(%s): not tracked\n", + par->hooknum, __func__, el_dev->name); spin_unlock_bh(&iface_stat_list_lock); return; } - IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + IF_DEBUG("qtaguid[%d]: %s(%s): entry=%p\n", par->hooknum, __func__, el_dev->name, entry); data_counters_update(&entry->totals_via_skb, 0, direction, proto, @@ -1417,13 +1426,13 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, iface_entry = get_iface_entry(ifname); if (!iface_entry) { - pr_err_ratelimited("qtaguid: iface_stat: stat_update() " + pr_err_ratelimited("qtaguid: tag_stat: stat_update() " "%s not found\n", ifname); return; } /* It is ok to process data when an iface_entry is inactive */ - MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", + MT_DEBUG("qtaguid: tag_stat: stat_update() dev=%s entry=%p\n", ifname, iface_entry); /* @@ -1440,7 +1449,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, tag = combine_atag_with_uid(acct_tag, uid); uid_tag = make_tag_from_uid(uid); } - MT_DEBUG("qtaguid: iface_stat: stat_update(): " + MT_DEBUG("qtaguid: tag_stat: stat_update(): " " looking for tag=0x%llx (uid=%u) in ife=%p\n", tag, get_uid_from_tag(tag), iface_entry); /* Loop over tag list under this interface for {acct_tag,uid_tag} */ @@ -1673,8 +1682,8 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, struct sock *sk; unsigned int hook_mask = (1 << par->hooknum); - MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, - par->hooknum, par->family); + MT_DEBUG("qtaguid[%d]: find_sk(skb=%p) family=%d\n", + par->hooknum, skb, par->family); /* * Let's not abuse the the xt_socket_get*_sk(), or else it will @@ -1700,8 +1709,8 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, * Not fixed in 3.0-r3 :( */ if (sk) { - MT_DEBUG("qtaguid: %p->sk_proto=%u " - "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); + MT_DEBUG("qtaguid[%d]: %p->sk_proto=%u->sk_state=%d\n", + par->hooknum, sk, sk->sk_protocol, sk->sk_state); if (sk->sk_state == TCP_TIME_WAIT) { xt_socket_put_sk(sk); sk = NULL; @@ -1715,37 +1724,19 @@ static void account_for_uid(const struct sk_buff *skb, struct xt_action_param *par) { const struct net_device *el_dev; + enum ifs_tx_rx direction; + int proto; - if (!skb->dev) { - MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); - el_dev = par->in ? : par->out; - } else { - const struct net_device *other_dev; - el_dev = skb->dev; - other_dev = par->in ? : par->out; - if (el_dev != other_dev) { - MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " - "par->(in/out)=%p %s\n", - par->hooknum, el_dev, el_dev->name, other_dev, - other_dev->name); - } - } - - if (unlikely(!el_dev)) { - pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); - } else if (unlikely(!el_dev->name)) { - pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); - } else { - int proto = ipx_proto(skb, par); - MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", - par->hooknum, el_dev->name, el_dev->type, - par->family, proto); + get_dev_and_dir(skb, par, &direction, &el_dev); + proto = ipx_proto(skb, par); + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d dir=%d\n", + par->hooknum, el_dev->name, el_dev->type, + par->family, proto, direction); - if_tag_stat_update(el_dev->name, uid, - skb->sk ? skb->sk : alternate_sk, - par->in ? IFS_RX : IFS_TX, - proto, skb->len); - } + if_tag_stat_update(el_dev->name, uid, + skb->sk ? skb->sk : alternate_sk, + direction, + proto, skb->len); } static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) @@ -1756,6 +1747,11 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk; uid_t sock_uid; bool res; + /* + * TODO: unhack how to force just accounting. + * For now we only do tag stats when the uid-owner is not requested + */ + bool do_tag_stat = !(info->match & XT_QTAGUID_UID); if (unlikely(module_passive)) return (info->match ^ info->invert) == 0; @@ -1820,12 +1816,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) * couldn't find the owner, so for now we just count them * against the system. */ - /* - * TODO: unhack how to force just accounting. - * For now we only do iface stats when the uid-owner is not - * requested. - */ - if (!(info->match & XT_QTAGUID_UID)) + if (do_tag_stat) account_for_uid(skb, sk, 0, par); MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", par->hooknum, @@ -1840,18 +1831,15 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) filp = sk->sk_socket->file; if (filp == NULL) { MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); - account_for_uid(skb, sk, 0, par); + if (do_tag_stat) + account_for_uid(skb, sk, 0, par); res = ((info->match ^ info->invert) & (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; atomic64_inc(&qtu_events.match_no_sk_file); goto put_sock_ret_res; } sock_uid = filp->f_cred->fsuid; - /* - * TODO: unhack how to force just accounting. - * For now we only do iface stats when the uid-owner is not requested - */ - if (!(info->match & XT_QTAGUID_UID)) + if (do_tag_stat) account_for_uid(skb, sk, sock_uid, par); /* From 96f5b658fe70449b80d7e451958268f82a596dd1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 18 Nov 2013 07:07:45 +0100 Subject: [PATCH 623/678] ping: prevent NULL pointer dereference on write to msg_name A plain read() on a socket does set msg->msg_name to NULL. So check for NULL pointer first. [Backport of net-next cf970c002d270c36202bd5b9c2804d3097a52da0] Bug: 12780426 Change-Id: I29d9cb95ef05ec76d37517e01317f4a29e60931c Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Lorenzo Colitti --- net/ipv4/ping.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a5d0173bf87..45bc0dd517f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -877,10 +877,12 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Copy the address and add cmsg data. */ if (family == AF_INET) { sin = (struct sockaddr_in *) msg->msg_name; - sin->sin_family = AF_INET; - sin->sin_port = 0 /* skb->h.uh->source */; - sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + if (sin) { + sin->sin_family = AF_INET; + sin->sin_port = 0 /* skb->h.uh->source */; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } if (isk->cmsg_flags) ip_cmsg_recv(msg, skb); @@ -890,17 +892,19 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *ip6 = ipv6_hdr(skb); sin6 = (struct sockaddr_in6 *) msg->msg_name; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = 0; - sin6->sin6_addr = ip6->saddr; - - sin6->sin6_flowinfo = 0; - if (np->sndflow) - sin6->sin6_flowinfo = - *(__be32 *)ip6 & IPV6_FLOWINFO_MASK; - sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); + if (sin6) { + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + sin6->sin6_flowinfo = 0; + if (np->sndflow) + sin6->sin6_flowinfo = + *(__be32 *)ip6 & IPV6_FLOWINFO_MASK; + sin6->sin6_scope_id = + ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); + } if (inet6_sk(sk)->rxopt.all) pingv6_ops.datagram_recv_ctl(sk, msg, skb); From db83f2642b1acf526a063eac3692a91b7e546e6f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Jan 2014 21:46:34 -0500 Subject: [PATCH 624/678] SELinux: Fix possible NULL pointer dereference in selinux_inode_permission() While running stress tests on adding and deleting ftrace instances I hit this bug: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: selinux_inode_permission+0x85/0x160 PGD 63681067 PUD 7ddbe067 PMD 0 Oops: 0000 [#1] PREEMPT CPU: 0 PID: 5634 Comm: ftrace-test-mki Not tainted 3.13.0-rc4-test-00033-gd2a6dde-dirty #20 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 task: ffff880078375800 ti: ffff88007ddb0000 task.ti: ffff88007ddb0000 RIP: 0010:[] [] selinux_inode_permission+0x85/0x160 RSP: 0018:ffff88007ddb1c48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000800000 RCX: ffff88006dd43840 RDX: 0000000000000001 RSI: 0000000000000081 RDI: ffff88006ee46000 RBP: ffff88007ddb1c88 R08: 0000000000000000 R09: ffff88007ddb1c54 R10: 6e6576652f6f6f66 R11: 0000000000000003 R12: 0000000000000000 R13: 0000000000000081 R14: ffff88006ee46000 R15: 0000000000000000 FS: 00007f217b5b6700(0000) GS:ffffffff81e21000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033^M CR2: 0000000000000020 CR3: 000000006a0fe000 CR4: 00000000000007f0 Call Trace: security_inode_permission+0x1c/0x30 __inode_permission+0x41/0xa0 inode_permission+0x18/0x50 link_path_walk+0x66/0x920 path_openat+0xa6/0x6c0 do_filp_open+0x43/0xa0 do_sys_open+0x146/0x240 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b Code: 84 a1 00 00 00 81 e3 00 20 00 00 89 d8 83 c8 02 40 f6 c6 04 0f 45 d8 40 f6 c6 08 74 71 80 cf 02 49 8b 46 38 4c 8d 4d cc 45 31 c0 <0f> b7 50 20 8b 70 1c 48 8b 41 70 89 d9 8b 78 04 e8 36 cf ff ff RIP selinux_inode_permission+0x85/0x160 CR2: 0000000000000020 Investigating, I found that the inode->i_security was NULL, and the dereference of it caused the oops. in selinux_inode_permission(): isec = inode->i_security; rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd); Note, the crash came from stressing the deletion and reading of debugfs files. I was not able to recreate this via normal files. But I'm not sure they are safe. It may just be that the race window is much harder to hit. What seems to have happened (and what I have traced), is the file is being opened at the same time the file or directory is being deleted. As the dentry and inode locks are not held during the path walk, nor is the inodes ref counts being incremented, there is nothing saving these structures from being discarded except for an rcu_read_lock(). The rcu_read_lock() protects against freeing of the inode, but it does not protect freeing of the inode_security_struct. Now if the freeing of the i_security happens with a call_rcu(), and the i_security field of the inode is not changed (it gets freed as the inode gets freed) then there will be no issue here. (Linus Torvalds suggested not setting the field to NULL such that we do not need to check if it is NULL in the permission check). Note, this is a hack, but it fixes the problem at hand. A real fix is to restructure the destroy_inode() to call all the destructor handlers from the RCU callback. But that is a major job to do, and requires a lot of work. For now, we just band-aid this bug with this fix (it works), and work on a more maintainable solution in the future. Link: http://lkml.kernel.org/r/20140109101932.0508dec7@gandalf.local.home Link: http://lkml.kernel.org/r/20140109182756.17abaaa8@gandalf.local.home Cc: stable@vger.kernel.org Signed-off-by: Steven Rostedt Signed-off-by: Linus Torvalds --- security/selinux/hooks.c | 20 ++++++++++++++++++-- security/selinux/include/objsec.h | 5 ++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a3e651d4bdb..6000792a055 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -215,6 +215,14 @@ static int inode_alloc_security(struct inode *inode) return 0; } +static void inode_free_rcu(struct rcu_head *head) +{ + struct inode_security_struct *isec; + + isec = container_of(head, struct inode_security_struct, rcu); + kmem_cache_free(sel_inode_cache, isec); +} + static void inode_free_security(struct inode *inode) { struct inode_security_struct *isec = inode->i_security; @@ -225,8 +233,16 @@ static void inode_free_security(struct inode *inode) list_del_init(&isec->list); spin_unlock(&sbsec->isec_lock); - inode->i_security = NULL; - kmem_cache_free(sel_inode_cache, isec); + /* + * The inode may still be referenced in a path walk and + * a call to selinux_inode_permission() can be made + * after inode_free_security() is called. Ideally, the VFS + * wouldn't do this, but fixing that is a much harder + * job. For now, simply free the i_security via RCU, and + * leave the current inode->i_security pointer intact. + * The inode will be freed after the RCU grace period too. + */ + call_rcu(&isec->rcu, inode_free_rcu); } static int file_alloc_security(struct file *file) diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 26c7eee1c30..7b1830bde1c 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -38,7 +38,10 @@ struct task_security_struct { struct inode_security_struct { struct inode *inode; /* back pointer to inode object */ - struct list_head list; /* list of inode_security_struct */ + union { + struct list_head list; /* list of inode_security_struct */ + struct rcu_head rcu; /* for freeing the inode_security_struct */ + }; u32 task_sid; /* SID of creating task */ u32 sid; /* SID of this object */ u16 sclass; /* security class of this object */ From 5c6007754433ed00296207641b6894ead7602f19 Mon Sep 17 00:00:00 2001 From: Ashish Sharma Date: Wed, 5 Feb 2014 01:50:50 +0000 Subject: [PATCH 625/678] netfilter: xt_IDLETIMER: Revert to retain the kernel API format. Reverted Change-Id: Iaeca5dd2d7878c0733923ae03309a2a7b86979ca Change-Id: I0e0a4f60ec14330d8d8d1c5a508fa058d9919e07 Signed-off-by: Ashish Sharma --- net/netfilter/xt_IDLETIMER.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index dbc5b399a29..542af525a87 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -68,15 +68,15 @@ static DEFINE_MUTEX(list_mutex); static struct kobject *idletimer_tg_kobj; -static void notify_netlink_uevent(const char *label, struct idletimer_tg *timer) +static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) { - char label_msg[NLMSG_MAX_SIZE]; + char iface_msg[NLMSG_MAX_SIZE]; char state_msg[NLMSG_MAX_SIZE]; - char *envp[] = { label_msg, state_msg, NULL }; + char *envp[] = { iface_msg, state_msg, NULL }; int res; - res = snprintf(label_msg, NLMSG_MAX_SIZE, "LABEL=%s", - label); + res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", + iface); if (NLMSG_MAX_SIZE <= res) { pr_err("message too long (%d)", res); return; @@ -87,7 +87,7 @@ static void notify_netlink_uevent(const char *label, struct idletimer_tg *timer) pr_err("message too long (%d)", res); return; } - pr_debug("putting nlmsg: <%s> <%s>\n", label_msg, state_msg); + pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); return; From 16df81d047fd43536d030ca1fbd6ee6c2de5b2e2 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 30 Jan 2014 11:26:59 -0500 Subject: [PATCH 626/678] SELinux: Fix kernel BUG on empty security contexts. Setting an empty security context (length=0) on a file will lead to incorrectly dereferencing the type and other fields of the security context structure, yielding a kernel BUG. As a zero-length security context is never valid, just reject all such security contexts whether coming from userspace via setxattr or coming from the filesystem upon a getxattr request by SELinux. Setting a security context value (empty or otherwise) unknown to SELinux in the first place is only possible for a root process (CAP_MAC_ADMIN), and, if running SELinux in enforcing mode, only if the corresponding SELinux mac_admin permission is also granted to the domain by policy. In Fedora policies, this is only allowed for specific domains such as livecd for setting down security contexts that are not defined in the build host policy. [On Android, this can only be set by root/CAP_MAC_ADMIN processes, and if running SELinux in enforcing mode, only if mac_admin permission is granted in policy. In Android 4.4, this would only be allowed for root/CAP_MAC_ADMIN processes that are also in unconfined domains. In current AOSP master, mac_admin is not allowed for any domains except the recovery console which has a legitimate need for it. The other potential vector is mounting a maliciously crafted filesystem for which SELinux fetches xattrs (e.g. an ext4 filesystem on a SDcard). However, the end result is only a local denial-of-service (DOS) due to kernel BUG. This fix is queued for 3.14.] Reproducer: su setenforce 0 touch foo setfattr -n security.selinux foo Caveat: Relabeling or removing foo after doing the above may not be possible without booting with SELinux disabled. Any subsequent access to foo after doing the above will also trigger the BUG. BUG output from Matthew Thode: [ 473.893141] ------------[ cut here ]------------ [ 473.962110] kernel BUG at security/selinux/ss/services.c:654! [ 473.995314] invalid opcode: 0000 [#6] SMP [ 474.027196] Modules linked in: [ 474.058118] CPU: 0 PID: 8138 Comm: ls Tainted: G D I 3.13.0-grsec #1 [ 474.116637] Hardware name: Supermicro X8ST3/X8ST3, BIOS 2.0 07/29/10 [ 474.149768] task: ffff8805f50cd010 ti: ffff8805f50cd488 task.ti: ffff8805f50cd488 [ 474.183707] RIP: 0010:[] [] context_struct_compute_av+0xce/0x308 [ 474.219954] RSP: 0018:ffff8805c0ac3c38 EFLAGS: 00010246 [ 474.252253] RAX: 0000000000000000 RBX: ffff8805c0ac3d94 RCX: 0000000000000100 [ 474.287018] RDX: ffff8805e8aac000 RSI: 00000000ffffffff RDI: ffff8805e8aaa000 [ 474.321199] RBP: ffff8805c0ac3cb8 R08: 0000000000000010 R09: 0000000000000006 [ 474.357446] R10: 0000000000000000 R11: ffff8805c567a000 R12: 0000000000000006 [ 474.419191] R13: ffff8805c2b74e88 R14: 00000000000001da R15: 0000000000000000 [ 474.453816] FS: 00007f2e75220800(0000) GS:ffff88061fc00000(0000) knlGS:0000000000000000 [ 474.489254] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 474.522215] CR2: 00007f2e74716090 CR3: 00000005c085e000 CR4: 00000000000207f0 [ 474.556058] Stack: [ 474.584325] ffff8805c0ac3c98 ffffffff811b549b ffff8805c0ac3c98 ffff8805f1190a40 [ 474.618913] ffff8805a6202f08 ffff8805c2b74e88 00068800d0464990 ffff8805e8aac860 [ 474.653955] ffff8805c0ac3cb8 000700068113833a ffff880606c75060 ffff8805c0ac3d94 [ 474.690461] Call Trace: [ 474.723779] [] ? lookup_fast+0x1cd/0x22a [ 474.778049] [] security_compute_av+0xf4/0x20b [ 474.811398] [] avc_compute_av+0x2a/0x179 [ 474.843813] [] avc_has_perm+0x45/0xf4 [ 474.875694] [] inode_has_perm+0x2a/0x31 [ 474.907370] [] selinux_inode_getattr+0x3c/0x3e [ 474.938726] [] security_inode_getattr+0x1b/0x22 [ 474.970036] [] vfs_getattr+0x19/0x2d [ 475.000618] [] vfs_fstatat+0x54/0x91 [ 475.030402] [] vfs_lstat+0x19/0x1b [ 475.061097] [] SyS_newlstat+0x15/0x30 [ 475.094595] [] ? __audit_syscall_entry+0xa1/0xc3 [ 475.148405] [] system_call_fastpath+0x16/0x1b [ 475.179201] Code: 00 48 85 c0 48 89 45 b8 75 02 0f 0b 48 8b 45 a0 48 8b 3d 45 d0 b6 00 8b 40 08 89 c6 ff ce e8 d1 b0 06 00 48 85 c0 49 89 c7 75 02 <0f> 0b 48 8b 45 b8 4c 8b 28 eb 1e 49 8d 7d 08 be 80 01 00 00 e8 [ 475.255884] RIP [] context_struct_compute_av+0xce/0x308 [ 475.296120] RSP [ 475.328734] ---[ end trace f076482e9d754adc ]--- [sds: commit message edited to note Android implications and to generate a unique Change-Id for gerrit] Change-Id: I4d5389f0cfa72b5f59dada45081fa47e03805413 Reported-by: Matthew Thode Signed-off-by: Stephen Smalley Cc: stable@vger.kernel.org Signed-off-by: Paul Moore --- security/selinux/ss/services.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index f6917bc0aa0..68c192b8fe7 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1231,6 +1231,10 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, struct context context; int rc = 0; + /* An empty security context is never valid. */ + if (!scontext_len) + return -EINVAL; + if (!ss_initialized) { int i; From c51c2e74ca0a3bcb5c1c161edb31979b68b94d6b Mon Sep 17 00:00:00 2001 From: keunyoung Date: Wed, 29 Jan 2014 12:41:50 -0800 Subject: [PATCH 627/678] fix false disconnect due to a signal sent to the reading process - In the current implementation, when a signal is sent to the reading process, read is cancelled by calling usb_ep_dequeue, which lead into calling acc_complete_out with ECONNRESET, but the current logic treats it as disconnection, which makes the device inaccessible until cable is actually disconnected. - The fix calls disconnect only when ESHUTDOWN error is passed. - If data has already arrived while trying cancelling, the data is marked as available, and it will be read out on the next read. This is necessary as USB bulk is assumed to guarantee no data loss. Signed-off-by: keunyoung --- drivers/usb/gadget/f_accessory.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 3f27ad217fa..89ffdc2d70c 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -261,8 +261,10 @@ static void acc_complete_in(struct usb_ep *ep, struct usb_request *req) { struct acc_dev *dev = _acc_dev; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_in set disconnected"); acc_set_disconnected(dev); + } req_put(dev, &dev->tx_idle, req); @@ -274,8 +276,10 @@ static void acc_complete_out(struct usb_ep *ep, struct usb_request *req) struct acc_dev *dev = _acc_dev; dev->rx_done = 1; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_out set disconnected"); acc_set_disconnected(dev); + } wake_up(&dev->read_wq); } @@ -557,8 +561,10 @@ static ssize_t acc_read(struct file *fp, char __user *buf, pr_debug("acc_read(%d)\n", count); - if (dev->disconnected) + if (dev->disconnected) { + pr_debug("acc_read disconnected"); return -ENODEV; + } if (count > BULK_BUFFER_SIZE) count = BULK_BUFFER_SIZE; @@ -571,6 +577,12 @@ static ssize_t acc_read(struct file *fp, char __user *buf, goto done; } + if (dev->rx_done) { + // last req cancelled. try to get it. + req = dev->rx_req[0]; + goto copy_data; + } + requeue_req: /* queue a request */ req = dev->rx_req[0]; @@ -588,9 +600,17 @@ static ssize_t acc_read(struct file *fp, char __user *buf, ret = wait_event_interruptible(dev->read_wq, dev->rx_done); if (ret < 0) { r = ret; - usb_ep_dequeue(dev->ep_out, req); + ret = usb_ep_dequeue(dev->ep_out, req); + if (ret != 0) { + // cancel failed. There can be a data already received. + // it will be retrieved in the next read. + pr_debug("acc_read: cancelling failed %d", ret); + } goto done; } + +copy_data: + dev->rx_done = 0; if (dev->online) { /* If we got a 0-len packet, throw it back and try again. */ if (req->actual == 0) @@ -619,8 +639,10 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, pr_debug("acc_write(%d)\n", count); - if (!dev->online || dev->disconnected) + if (!dev->online || dev->disconnected) { + pr_debug("acc_write disconnected or not online"); return -ENODEV; + } while (count > 0) { if (!dev->online) { From adcfec51aac889a7cb5ff56fd90d075c56afc71f Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 19 Feb 2014 15:07:57 -0800 Subject: [PATCH 628/678] tcp: add a sysctl to config the tcp_default_init_rwnd The default initial rwnd was hardcoded to 10. Now we allow it to be controlled via /proc/sys/net/ipv4/tcp_default_init_rwnd which limits the values from 3 to 100 This is somewhat needed because ipv6 routes are autoconfigured by the kernel. See "An Argument for Increasing TCP's Initial Congestion Window" in https://developers.google.com/speed/articles/tcp_initcwnd_paper.pdf Change-Id: I7eac8a0a5133371aea9ecb9aec0b608bd7f2cc57 Signed-off-by: JP Abgrall Conflicts: include/net/tcp.h --- include/net/tcp.h | 1 + net/ipv4/sysctl_net_ipv4.c | 24 +++++++++++++++++++++++- net/ipv4/tcp_input.c | 13 ++++++++----- net/ipv4/tcp_output.c | 7 +++---- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fcd77af405..97cee56cde3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,6 +252,7 @@ extern int sysctl_tcp_max_ssthresh; extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; +extern int sysctl_tcp_default_init_rwnd; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 69fd7201129..f8651024ea6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -119,6 +119,21 @@ static int ipv4_ping_group_range(ctl_table *table, int write, return ret; } +/* Validate changes from /proc interface. */ +static int proc_tcp_default_init_rwnd(ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int old_value = *(int *)ctl->data; + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; + + if (write && ret == 0 && (new_value < 3 || new_value > 100)) + *(int *)ctl->data = old_value; + + return ret; +} + static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -631,13 +646,20 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { + { .procname = "tcp_thin_dupack", .data = &sysctl_tcp_thin_dupack, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_default_init_rwnd", + .data = &sysctl_tcp_default_init_rwnd, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tcp_default_init_rwnd + }, { .procname = "udp_mem", .data = &sysctl_udp_mem, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d73aab3fbfc..ec0cdab8ed9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -97,6 +97,7 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_abc __read_mostly; +int sysctl_tcp_default_init_rwnd __read_mostly = TCP_DEFAULT_INIT_RCVWND; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -342,14 +343,16 @@ static void tcp_fixup_rcvbuf(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); - /* Try to select rcvbuf so that 4 mss-sized segments - * will fit to window and corresponding skbs will fit to our rcvbuf. - * (was 3; 4 is minimum to allow fast retransmit to work.) + /* Try to select rcvbuf so that sysctl_tcp_default_init_rwnd mss-sized + * segments will fit to window and corresponding skbs will fit to our + * rcvbuf. + * (was 3; then 4 as then minimum to allow fast retransmit to work.) */ while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; - if (sk->sk_rcvbuf < 4 * rcvmem) - sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); + if (sk->sk_rcvbuf < sysctl_tcp_default_init_rwnd * rcvmem) + sk->sk_rcvbuf = min(sysctl_tcp_default_init_rwnd * rcvmem, + sysctl_tcp_rmem[2]); } /* 4. Try to fixup all. It is made immediately after connection enters diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index faf257b9415..a9f3481b041 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -229,14 +229,13 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set initial window to a value enough for senders starting with - * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place + * initial congestion window of sysctl_tcp_default_init_rwnd. Place * a limit on the initial window when mss is larger than 1460. */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = TCP_DEFAULT_INIT_RCVWND; + int init_cwnd = sysctl_tcp_default_init_rwnd; if (mss > 1460) - init_cwnd = - max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); + init_cwnd = max_t(u32, (1460 * init_cwnd) / mss, 2); /* when initializing use the value from init_rcv_wnd * rather than the default from above */ From c1290d22736d397db2b7b1ce7362c7c85c37b83d Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Thu, 16 Jan 2014 11:11:02 -0800 Subject: [PATCH 629/678] android: configs: Grab the android/configs from kernel/common This is a squash of all changes from kernel/common android-3.4 up to 5e35d66 android: configs: add IPV6 ROUTE INFO Change-Id: I848f1865ec7da1dfc3338a3e9d7f944a6f00f2a6 Signed-off-by: JP Abgrall --- android/configs/README | 15 +++ android/configs/android-base.cfg | 139 ++++++++++++++++++++++++ android/configs/android-recommended.cfg | 118 ++++++++++++++++++++ 3 files changed, 272 insertions(+) create mode 100644 android/configs/README create mode 100644 android/configs/android-base.cfg create mode 100644 android/configs/android-recommended.cfg diff --git a/android/configs/README b/android/configs/README new file mode 100644 index 00000000000..8798731f890 --- /dev/null +++ b/android/configs/README @@ -0,0 +1,15 @@ +The files in this directory are meant to be used as a base for an Android +kernel config. All devices should have the options in android-base.cfg enabled. +While not mandatory, the options in android-recommended.cfg enable advanced +Android features. + +Assuming you already have a minimalist defconfig for your device, a possible +way to enable these options would be: + + ARCH= scripts/kconfig/merge_config.sh /_defconfig android/configs/android-base.cfg android/configs/android-recommended.cfg + +This will generate a .config that can then be used to save a new defconfig or +compile a new kernel with Android features enabled. + +Because there is no tool to consistently generate these config fragments, +lets keep them alphabetically sorted instead of random. diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg new file mode 100644 index 00000000000..1fb9bb5b1ac --- /dev/null +++ b/android/configs/android-base.cfg @@ -0,0 +1,139 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_INET_LRO is not set +# CONFIG_MODULES is not set +# CONFIG_OABI_COMPAT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_INTF_ALARM_DEV=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ASHMEM=y +CONFIG_BLK_DEV_DM=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_SCHED=y +CONFIG_DM_CRYPT=y +CONFIG_EMBEDDED=y +CONFIG_EXPERIMENTAL=y +CONFIG_FB=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT_SKERR=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_REJECT_SKERR=y +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_KEY=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_NAT=y +CONFIG_NO_HZ=y +CONFIG_PACKET=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PPP=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PREEMPT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_RTC_CLASS=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_STAGING=y +CONFIG_SWITCH=y +CONFIG_SYNC=y +CONFIG_SYSVIPC=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_USB_GADGET=y +CONFIG_USB_G_ANDROID=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_XFRM_USER=y diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg new file mode 100644 index 00000000000..9caa089c5d1 --- /dev/null +++ b/android/configs/android-recommended.cfg @@ -0,0 +1,118 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_PM_WAKELOCKS_GC is not set +# CONFIG_VT is not set +CONFIG_ANDROID_RAM_CONSOLE=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_COMPACTION=y +CONFIG_DM_UEVENT=y +CONFIG_DRAGONRISE_FF=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FUSE_FS=y +CONFIG_GREENASIA_FF=y +CONFIG_HIDRAW=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WACOM=y +CONFIG_HID_WALTOP=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_GPIO=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_KEYRESET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_UINPUT=y +CONFIG_ION=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KSM=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGITECH_FF=y +CONFIG_MD=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_MSDOS_FS=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_PANTHERLORD_FF=y +CONFIG_PERF_EVENTS=y +CONFIG_PM_DEBUG=y +CONFIG_PM_RUNTIME=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +CONFIG_POWER_SUPPLY=y +CONFIG_SCHEDSTATS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_SND=y +CONFIG_SOUND=y +CONFIG_SUSPEND_TIME=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TABLET_USB_WACOM=y +CONFIG_TIMER_STATS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_UHID=y +CONFIG_UID_STAT=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_USBNET=y +CONFIG_VFAT_FS=y From 3ec6c49987f647945dcf6c34e9214573efde0781 Mon Sep 17 00:00:00 2001 From: Ashish Sharma Date: Wed, 15 Jan 2014 16:47:16 -0800 Subject: [PATCH 630/678] android: configs: Add CONFIG_NETFILTER_XT_TARGET_IDLETIMER Signed-off-by: Ashish Sharma --- android/configs/android-base.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 1fb9bb5b1ac..6b67601b7f1 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -83,6 +83,7 @@ CONFIG_NETFILTER_XT_MATCH_TIME=y CONFIG_NETFILTER_XT_MATCH_U32=y CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y From 76443a174cd5f3812ec6a29a7a67b6004c61ffdc Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Thu, 27 Feb 2014 19:38:14 -0800 Subject: [PATCH 631/678] android: base-cfg: enable DM_VERITY (used for secureboot) Change-Id: I68d769f97ffa76bb45e65d34a96dd7f558c02d08 Signed-off-by: JP Abgrall (cherry picked from commit 37cb5bec8983e505eecb730188bfc113d087dee7) --- android/configs/android-base.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 6b67601b7f1..225f3e28590 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -16,6 +16,7 @@ CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_DM_CRYPT=y +CONFIG_DM_VERITY=y CONFIG_EMBEDDED=y CONFIG_EXPERIMENTAL=y CONFIG_FB=y @@ -35,8 +36,8 @@ CONFIG_IPV6_MIP6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_OPTIMISTIC_DAD=y CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_ARPFILTER=y From 564c80d4df36a8cbeeb85d8fa26694b92064e333 Mon Sep 17 00:00:00 2001 From: Masami Ichikawa Date: Tue, 21 Feb 2012 07:43:50 +0900 Subject: [PATCH 632/678] sysfs: Fix memory leak in sysfs_sd_setsecdata(). This patch fixies follwing two memory leak patterns that reported by kmemleak. sysfs_sd_setsecdata() is called during sys_lsetxattr() operation. It checks sd->s_iattr is NULL or not. Then if it is NULL, it calls sysfs_init_inode_attrs() to allocate memory. That code is this. iattrs = sd->s_iattr; if (!iattrs) iattrs = sysfs_init_inode_attrs(sd); The iattrs recieves sysfs_init_inode_attrs()'s result, but sd->s_iattr doesn't know the address. so it needs to set correct address to sd->s_iattr to free memory in other function. unreferenced object 0xffff880250b73e60 (size 32): comm "systemd", pid 1, jiffies 4294683888 (age 94.553s) hex dump (first 32 bytes): 73 79 73 74 65 6d 5f 75 3a 6f 62 6a 65 63 74 5f system_u:object_ 72 3a 73 79 73 66 73 5f 74 3a 73 30 00 00 00 00 r:sysfs_t:s0.... backtrace: [] kmemleak_alloc+0x73/0x98 [] __kmalloc+0x100/0x12c [] context_struct_to_string+0x106/0x210 [] security_sid_to_context_core+0x10b/0x129 [] security_sid_to_context+0x10/0x12 [] selinux_inode_getsecurity+0x7d/0xa8 [] selinux_inode_getsecctx+0x22/0x2e [] security_inode_getsecctx+0x16/0x18 [] sysfs_setxattr+0x96/0x117 [] __vfs_setxattr_noperm+0x73/0xd9 [] vfs_setxattr+0x83/0xa1 [] setxattr+0xcf/0x101 [] sys_lsetxattr+0x6a/0x8f [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff unreferenced object 0xffff88024163c5a0 (size 96): comm "systemd", pid 1, jiffies 4294683888 (age 94.553s) hex dump (first 32 bytes): 00 00 00 00 ed 41 00 00 00 00 00 00 00 00 00 00 .....A.......... 00 00 00 00 00 00 00 00 0c 64 42 4f 00 00 00 00 .........dBO.... backtrace: [] kmemleak_alloc+0x73/0x98 [] kmem_cache_alloc_trace+0xc4/0xee [] sysfs_init_inode_attrs+0x2a/0x83 [] sysfs_setxattr+0xbf/0x117 [] __vfs_setxattr_noperm+0x73/0xd9 [] vfs_setxattr+0x83/0xa1 [] setxattr+0xcf/0x101 [] sys_lsetxattr+0x6a/0x8f [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff ` Signed-off-by: Masami Ichikawa Cc: stable Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index e3f091a81c7..1b3c622aa19 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec void *old_secdata; size_t old_secdata_len; - iattrs = sd->s_iattr; - if (!iattrs) - iattrs = sysfs_init_inode_attrs(sd); - if (!iattrs) - return -ENOMEM; + if (!sd->s_iattr) { + sd->s_iattr = sysfs_init_inode_attrs(sd); + if (!sd->s_iattr) + return -ENOMEM; + } + iattrs = sd->s_iattr; old_secdata = iattrs->ia_secdata; old_secdata_len = iattrs->ia_secdata_len; From d237808d3829e80c3481cf09c4f4ce86dba0dc30 Mon Sep 17 00:00:00 2001 From: Metallice Date: Tue, 10 Jun 2014 23:23:08 -0400 Subject: [PATCH 633/678] defconfig: a68 v2 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 1282bca05a6..c889cb6ed6b 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-68" +CONFIG_LOCALVERSION="-MKernel-a68" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y From 2eaf67512e44ea6dcb7b39f3ca55846c66618ed3 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 00:47:39 -0400 Subject: [PATCH 634/678] makefile: enable cache --- Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 54878701fa5..cd2931f6d03 100644 --- a/Makefile +++ b/Makefile @@ -158,6 +158,8 @@ VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD)) export srctree objtree VPATH +CCACHE := ccache + # SUBARCH tells the usermode build what the underlying arch is. That is set # first, and if a usermode build is happening, the "ARCH=um" on the command @@ -193,7 +195,7 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile export KBUILD_BUILDHOST := $(SUBARCH) ARCH ?= $(SUBARCH) -CROSS_COMPILE ?= $(CONFIG_CROSS_COMPILE:"%"=%) +CROSS_COMPILE ?= $(CCACHE) $(CONFIG_CROSS_COMPILE:"%"=%) # Architecture as present in compile.h UTS_MACHINE := $(ARCH) @@ -243,8 +245,8 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ else if [ -x /bin/bash ]; then echo /bin/bash; \ else echo sh; fi ; fi) -HOSTCC = gcc -HOSTCXX = g++ +HOSTCC = $(CCACHE) gcc +HOSTCXX = $(CCACHE) g++ HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer HOSTCXXFLAGS = -O2 @@ -330,7 +332,7 @@ include $(srctree)/scripts/Kbuild.include AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc +CC = $(CCACHE) $(CROSS_COMPILE)gcc CPP = $(CC) -E AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm From bc9758fc90e583d8eea438c064df52372b5ca45b Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 00:47:56 -0400 Subject: [PATCH 635/678] defconfig: enable ksm --- arch/arm/configs/metallice_grouper_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index c889cb6ed6b..234cef6be01 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -488,7 +488,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_ZONE_DMA_FLAG=0 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y -# CONFIG_KSM is not set +CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 # CONFIG_CLEANCACHE is not set CONFIG_FORCE_MAX_ZONEORDER=11 From 87d0a5a52d8a9fea12ca9790aa49d2b756b027cb Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 01:25:25 -0400 Subject: [PATCH 636/678] Revert "[PATCH 4/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v6r2 for 3.1.0" This reverts commit 5462f000d40eb613a8e544d09b461fc7d763de41. --- include/linux/iocontext.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index fbdaa5aef61..69fdd5894ef 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -22,9 +22,6 @@ struct cfq_io_context { struct cfq_ttime ttime; - unsigned int raising_time_left; - unsigned int saved_idle_window; - struct list_head queue_list; struct hlist_node cic_list; From e322b2db0109d99b17444a61b7a46c9633caf883 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 01:25:43 -0400 Subject: [PATCH 637/678] Revert "[PATCH 3/4] block: introduce the BFQ-v6r2 I/O sched for 3.1" This reverts commit 019a0107b0508b06ae5f30c2c6e410b1ef411e2d. --- include/linux/iocontext.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 69fdd5894ef..fbdaa5aef61 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -22,6 +22,9 @@ struct cfq_io_context { struct cfq_ttime ttime; + unsigned int raising_time_left; + unsigned int saved_idle_window; + struct list_head queue_list; struct hlist_node cic_list; From 619a9b7f230094c07881f25a9ce1e8d7c667a520 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 01:26:11 -0400 Subject: [PATCH 638/678] Revert "[PATCH 2/4] block: cgroups, kconfig, build bits for BFQ-v6r2-3.1" This reverts commit ed50439f4724f002066e22e59dff1feaa08008aa. Conflicts: block/bfq-iosched.c --- block/Makefile | 1 - block/bfq-cgroup.c | 876 ----------- block/bfq-ioc.c | 410 ------ block/bfq-iosched.c | 3370 ------------------------------------------- block/bfq-sched.c | 1040 ------------- block/bfq.h | 606 -------- 6 files changed, 6303 deletions(-) delete mode 100644 block/bfq-cgroup.c delete mode 100644 block/bfq-ioc.c delete mode 100644 block/bfq-iosched.c delete mode 100644 block/bfq-sched.c delete mode 100644 block/bfq.h diff --git a/block/Makefile b/block/Makefile index 760d8f3ff2e..eb332a2d98c 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c deleted file mode 100644 index be3e902c5c9..00000000000 --- a/block/bfq-cgroup.c +++ /dev/null @@ -1,876 +0,0 @@ -/* - * BFQ: CGROUPS support. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente - * - * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. - */ - -#ifdef CONFIG_CGROUP_BFQIO -static struct bfqio_cgroup bfqio_root_cgroup = { - .weight = BFQ_DEFAULT_GRP_WEIGHT, - .ioprio = BFQ_DEFAULT_GRP_IOPRIO, - .ioprio_class = BFQ_DEFAULT_GRP_CLASS, -}; - -static inline void bfq_init_entity(struct bfq_entity *entity, - struct bfq_group *bfqg) -{ - entity->weight = entity->new_weight; - entity->orig_weight = entity->new_weight; - entity->ioprio = entity->new_ioprio; - entity->ioprio_class = entity->new_ioprio_class; - entity->parent = bfqg->my_entity; - entity->sched_data = &bfqg->sched_data; -} - -static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) -{ - return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), - struct bfqio_cgroup, css); -} - -/* - * Search the bfq_group for bfqd into the hash table (by now only a list) - * of bgrp. Must be called under rcu_read_lock(). - */ -static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, - struct bfq_data *bfqd) -{ - struct bfq_group *bfqg; - struct hlist_node *n; - void *key; - - hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) { - key = rcu_dereference(bfqg->bfqd); - if (key == bfqd) - return bfqg; - } - - return NULL; -} - -static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, - struct bfq_group *bfqg) -{ - struct bfq_entity *entity = &bfqg->entity; - - /* - * If the weight of the entity has never been set via the sysfs - * interface, then bgrp->weight == 0. In this case we initialize - * the weight from the current ioprio value. Otherwise, the group - * weight, if set, has priority over the ioprio value. - */ - if (bgrp->weight == 0) { - entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); - entity->new_ioprio = bgrp->ioprio; - } else { - entity->new_weight = bgrp->weight; - entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); - } - entity->orig_weight = entity->weight = entity->new_weight; - entity->ioprio = entity->new_ioprio; - entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; - entity->my_sched_data = &bfqg->sched_data; -} - -static inline void bfq_group_set_parent(struct bfq_group *bfqg, - struct bfq_group *parent) -{ - struct bfq_entity *entity; - - BUG_ON(parent == NULL); - BUG_ON(bfqg == NULL); - - entity = &bfqg->entity; - entity->parent = parent->my_entity; - entity->sched_data = &parent->sched_data; -} - -/** - * bfq_group_chain_alloc - allocate a chain of groups. - * @bfqd: queue descriptor. - * @cgroup: the leaf cgroup this chain starts from. - * - * Allocate a chain of groups starting from the one belonging to - * @cgroup up to the root cgroup. Stop if a cgroup on the chain - * to the root has already an allocated group on @bfqd. - */ -static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, - struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp; - struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; - - for (; cgroup != NULL; cgroup = cgroup->parent) { - bgrp = cgroup_to_bfqio(cgroup); - - bfqg = bfqio_lookup_group(bgrp, bfqd); - if (bfqg != NULL) { - /* - * All the cgroups in the path from there to the - * root must have a bfq_group for bfqd, so we don't - * need any more allocations. - */ - break; - } - - bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); - if (bfqg == NULL) - goto cleanup; - - bfq_group_init_entity(bgrp, bfqg); - bfqg->my_entity = &bfqg->entity; - - if (leaf == NULL) { - leaf = bfqg; - prev = leaf; - } else { - bfq_group_set_parent(prev, bfqg); - /* - * Build a list of allocated nodes using the bfqd - * filed, that is still unused and will be initialized - * only after the node will be connected. - */ - prev->bfqd = bfqg; - prev = bfqg; - } - } - - return leaf; - -cleanup: - while (leaf != NULL) { - prev = leaf; - leaf = leaf->bfqd; - kfree(prev); - } - - return NULL; -} - -/** - * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy. - * @bfqd: the queue descriptor. - * @cgroup: the leaf cgroup to start from. - * @leaf: the leaf group (to be associated to @cgroup). - * - * Try to link a chain of groups to a cgroup hierarchy, connecting the - * nodes bottom-up, so we can be sure that when we find a cgroup in the - * hierarchy that already as a group associated to @bfqd all the nodes - * in the path to the root cgroup have one too. - * - * On locking: the queue lock protects the hierarchy (there is a hierarchy - * per device) while the bfqio_cgroup lock protects the list of groups - * belonging to the same cgroup. - */ -static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, - struct bfq_group *leaf) -{ - struct bfqio_cgroup *bgrp; - struct bfq_group *bfqg, *next, *prev = NULL; - unsigned long flags; - - assert_spin_locked(bfqd->queue->queue_lock); - - for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { - bgrp = cgroup_to_bfqio(cgroup); - next = leaf->bfqd; - - bfqg = bfqio_lookup_group(bgrp, bfqd); - BUG_ON(bfqg != NULL); - - spin_lock_irqsave(&bgrp->lock, flags); - - rcu_assign_pointer(leaf->bfqd, bfqd); - hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); - hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); - - spin_unlock_irqrestore(&bgrp->lock, flags); - - prev = leaf; - leaf = next; - } - - BUG_ON(cgroup == NULL && leaf != NULL); - if (cgroup != NULL && prev != NULL) { - bgrp = cgroup_to_bfqio(cgroup); - bfqg = bfqio_lookup_group(bgrp, bfqd); - bfq_group_set_parent(prev, bfqg); - } -} - -/** - * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. - * @bfqd: queue descriptor. - * @cgroup: cgroup being searched for. - * - * Return a group associated to @bfqd in @cgroup, allocating one if - * necessary. When a group is returned all the cgroups in the path - * to the root have a group associated to @bfqd. - * - * If the allocation fails, return the root group: this breaks guarantees - * but is a safe fallbak. If this loss becames a problem it can be - * mitigated using the equivalent weight (given by the product of the - * weights of the groups in the path from @group to the root) in the - * root scheduler. - * - * We allocate all the missing nodes in the path from the leaf cgroup - * to the root and we connect the nodes only after all the allocations - * have been successful. - */ -static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, - struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); - struct bfq_group *bfqg; - - bfqg = bfqio_lookup_group(bgrp, bfqd); - if (bfqg != NULL) - return bfqg; - - bfqg = bfq_group_chain_alloc(bfqd, cgroup); - if (bfqg != NULL) - bfq_group_chain_link(bfqd, cgroup, bfqg); - else - bfqg = bfqd->root_group; - - return bfqg; -} - -/** - * bfq_bfqq_move - migrate @bfqq to @bfqg. - * @bfqd: queue descriptor. - * @bfqq: the queue to move. - * @entity: @bfqq's entity. - * @bfqg: the group to move to. - * - * Move @bfqq to @bfqg, deactivating it from its old group and reactivating - * it on the new one. Avoid putting the entity on the old group idle tree. - * - * Must be called under the queue lock; the cgroup owning @bfqg must - * not disappear (by now this just means that we are called under - * rcu_read_lock()). - */ -static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct bfq_entity *entity, struct bfq_group *bfqg) -{ - int busy, resume; - - busy = bfq_bfqq_busy(bfqq); - resume = !RB_EMPTY_ROOT(&bfqq->sort_list); - - BUG_ON(resume && !entity->on_st); - BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue); - - if (busy) { - BUG_ON(atomic_read(&bfqq->ref) < 2); - - if (!resume) - bfq_del_bfqq_busy(bfqd, bfqq, 0); - else - bfq_deactivate_bfqq(bfqd, bfqq, 0); - } else if (entity->on_st) - bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); - - /* - * Here we use a reference to bfqg. We don't need a refcounter - * as the cgroup reference will not be dropped, so that its - * destroy() callback will not be invoked. - */ - entity->parent = bfqg->my_entity; - entity->sched_data = &bfqg->sched_data; - - if (busy && resume) - bfq_activate_bfqq(bfqd, bfqq); - - if (bfqd->active_queue == NULL && !bfqd->rq_in_driver) - bfq_schedule_dispatch(bfqd); -} - -/** - * __bfq_cic_change_cgroup - move @cic to @cgroup. - * @bfqd: the queue descriptor. - * @cic: the cic to move. - * @cgroup: the cgroup to move to. - * - * Move cic to cgroup, assuming that bfqd->queue is locked; the caller - * has to make sure that the reference to cgroup is valid across the call. - * - * NOTE: an alternative approach might have been to store the current - * cgroup in bfqq and getting a reference to it, reducing the lookup - * time here, at the price of slightly more complex code. - */ -static struct bfq_group *__bfq_cic_change_cgroup(struct bfq_data *bfqd, - struct cfq_io_context *cic, - struct cgroup *cgroup) -{ - struct bfq_queue *async_bfqq; - struct bfq_queue *sync_bfqq; - struct bfq_entity *entity; - struct bfq_group *bfqg; - - spin_lock(&bfqd->eqm_lock); - - async_bfqq = cic_to_bfqq(cic, 0); - sync_bfqq = cic_to_bfqq(cic, 1); - - bfqg = bfq_find_alloc_group(bfqd, cgroup); - if (async_bfqq != NULL) { - entity = &async_bfqq->entity; - - if (entity->sched_data != &bfqg->sched_data) { - cic_set_bfqq(cic, NULL, 0); - bfq_log_bfqq(bfqd, async_bfqq, - "cic_change_group: %p %d", - async_bfqq, atomic_read(&async_bfqq->ref)); - bfq_put_queue(async_bfqq); - } - } - - if (sync_bfqq != NULL) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); - } - - spin_unlock(&bfqd->eqm_lock); - - return bfqg; -} - -/** - * bfq_cic_change_cgroup - move @cic to @cgroup. - * @cic: the cic being migrated. - * @cgroup: the destination cgroup. - * - * When the task owning @cic is moved to @cgroup, @cic is immediately - * moved into its new parent group. - */ -static void bfq_cic_change_cgroup(struct cfq_io_context *cic, - struct cgroup *cgroup) -{ - struct bfq_data *bfqd; - unsigned long uninitialized_var(flags); - - bfqd = bfq_get_bfqd_locked(&cic->key, &flags); - if (bfqd != NULL && - !strncmp(bfqd->queue->elevator->elevator_type->elevator_name, - "bfq", ELV_NAME_MAX)) { - __bfq_cic_change_cgroup(bfqd, cic, cgroup); - bfq_put_bfqd_unlock(bfqd, &flags); - } -} - -/** - * bfq_cic_update_cgroup - update the cgroup of @cic. - * @cic: the @cic to update. - * - * Make sure that @cic is enqueued in the cgroup of the current task. - * We need this in addition to moving cics during the cgroup attach - * phase because the task owning @cic could be at its first disk - * access or we may end up in the root cgroup as the result of a - * memory allocation failure and here we try to move to the right - * group. - * - * Must be called under the queue lock. It is safe to use the returned - * value even after the rcu_read_unlock() as the migration/destruction - * paths act under the queue lock too. IOW it is impossible to race with - * group migration/destruction and end up with an invalid group as: - * a) here cgroup has not yet been destroyed, nor its destroy callback - * has started execution, as current holds a reference to it, - * b) if it is destroyed after rcu_read_unlock() [after current is - * migrated to a different cgroup] its attach() callback will have - * taken care of remove all the references to the old cgroup data. - */ -static struct bfq_group *bfq_cic_update_cgroup(struct cfq_io_context *cic) -{ - struct bfq_data *bfqd = cic->key; - struct bfq_group *bfqg; - struct cgroup *cgroup; - - BUG_ON(bfqd == NULL); - - rcu_read_lock(); - cgroup = task_cgroup(current, bfqio_subsys_id); - bfqg = __bfq_cic_change_cgroup(bfqd, cic, cgroup); - rcu_read_unlock(); - - return bfqg; -} - -/** - * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. - * @st: the service tree being flushed. - */ -static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) -{ - struct bfq_entity *entity = st->first_idle; - - for (; entity != NULL; entity = st->first_idle) - __bfq_deactivate_entity(entity, 0); -} - -/** - * bfq_reparent_leaf_entity - move leaf entity to the root_group. - * @bfqd: the device data structure with the root group. - * @entity: the entity to move. - */ -static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(bfqq == NULL); - bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); - return; -} - -/** - * bfq_reparent_active_entities - move to the root group all active entities. - * @bfqd: the device data structure with the root group. - * @bfqg: the group to move from. - * @st: the service tree with the entities. - * - * Needs queue_lock to be taken and reference to be valid over the call. - */ -static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, - struct bfq_group *bfqg, - struct bfq_service_tree *st) -{ - struct rb_root *active = &st->active; - struct bfq_entity *entity = NULL; - - if (!RB_EMPTY_ROOT(&st->active)) - entity = bfq_entity_of(rb_first(active)); - - for (; entity != NULL ; entity = bfq_entity_of(rb_first(active))) - bfq_reparent_leaf_entity(bfqd, entity); - - if (bfqg->sched_data.active_entity != NULL) - bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity); - - return; -} - -/** - * bfq_destroy_group - destroy @bfqg. - * @bgrp: the bfqio_cgroup containing @bfqg. - * @bfqg: the group being destroyed. - * - * Destroy @bfqg, making sure that it is not referenced from its parent. - */ -static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) -{ - struct bfq_data *bfqd; - struct bfq_service_tree *st; - struct bfq_entity *entity = bfqg->my_entity; - unsigned long uninitialized_var(flags); - int i; - - hlist_del(&bfqg->group_node); - - /* - * Empty all service_trees belonging to this group before deactivating - * the group itself. - */ - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { - st = bfqg->sched_data.service_tree + i; - - /* - * The idle tree may still contain bfq_queues belonging - * to exited task because they never migrated to a different - * cgroup from the one being destroyed now. Noone else - * can access them so it's safe to act without any lock. - */ - bfq_flush_idle_tree(st); - - /* - * It may happen that some queues are still active - * (busy) upon group destruction (if the corresponding - * processes have been forced to terminate). We move - * all the leaf entities corresponding to these queues - * to the root_group. - * Also, it may happen that the group has an entity - * under service, which is disconnected from the active - * tree: it must be moved, too. - * There is no need to put the sync queues, as the - * scheduler has taken no reference. - */ - bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); - if (bfqd != NULL) { - bfq_reparent_active_entities(bfqd, bfqg, st); - bfq_put_bfqd_unlock(bfqd, &flags); - } - BUG_ON(!RB_EMPTY_ROOT(&st->active)); - BUG_ON(!RB_EMPTY_ROOT(&st->idle)); - } - BUG_ON(bfqg->sched_data.next_active != NULL); - BUG_ON(bfqg->sched_data.active_entity != NULL); - - /* - * We may race with device destruction, take extra care when - * dereferencing bfqg->bfqd. - */ - bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); - if (bfqd != NULL) { - hlist_del(&bfqg->bfqd_node); - __bfq_deactivate_entity(entity, 0); - bfq_put_async_queues(bfqd, bfqg); - bfq_put_bfqd_unlock(bfqd, &flags); - } - BUG_ON(entity->tree != NULL); - - /* - * No need to defer the kfree() to the end of the RCU grace - * period: we are called from the destroy() callback of our - * cgroup, so we can be sure that noone is a) still using - * this cgroup or b) doing lookups in it. - */ - kfree(bfqg); -} - -static void bfq_end_raising_async(struct bfq_data *bfqd) -{ - struct hlist_node *pos, *n; - struct bfq_group *bfqg; - - hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) - bfq_end_raising_async_queues(bfqd, bfqg); -} - -/** - * bfq_disconnect_groups - diconnect @bfqd from all its groups. - * @bfqd: the device descriptor being exited. - * - * When the device exits we just make sure that no lookup can return - * the now unused group structures. They will be deallocated on cgroup - * destruction. - */ -static void bfq_disconnect_groups(struct bfq_data *bfqd) -{ - struct hlist_node *pos, *n; - struct bfq_group *bfqg; - - bfq_log(bfqd, "disconnect_groups beginning") ; - hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) { - hlist_del(&bfqg->bfqd_node); - - __bfq_deactivate_entity(bfqg->my_entity, 0); - - /* - * Don't remove from the group hash, just set an - * invalid key. No lookups can race with the - * assignment as bfqd is being destroyed; this - * implies also that new elements cannot be added - * to the list. - */ - rcu_assign_pointer(bfqg->bfqd, NULL); - - bfq_log(bfqd, "disconnect_groups: put async for group %p", - bfqg) ; - bfq_put_async_queues(bfqd, bfqg); - } -} - -static inline void bfq_free_root_group(struct bfq_data *bfqd) -{ - struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; - struct bfq_group *bfqg = bfqd->root_group; - - bfq_put_async_queues(bfqd, bfqg); - - spin_lock_irq(&bgrp->lock); - hlist_del_rcu(&bfqg->group_node); - spin_unlock_irq(&bgrp->lock); - - /* - * No need to synchronize_rcu() here: since the device is gone - * there cannot be any read-side access to its root_group. - */ - kfree(bfqg); -} - -static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -{ - struct bfq_group *bfqg; - struct bfqio_cgroup *bgrp; - int i; - - bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); - if (bfqg == NULL) - return NULL; - - bfqg->entity.parent = NULL; - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) - bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; - - bgrp = &bfqio_root_cgroup; - spin_lock_irq(&bgrp->lock); - rcu_assign_pointer(bfqg->bfqd, bfqd); - hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); - spin_unlock_irq(&bgrp->lock); - - return bfqg; -} - -#define SHOW_FUNCTION(__VAR) \ -static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ - struct cftype *cftype) \ -{ \ - struct bfqio_cgroup *bgrp; \ - u64 ret; \ - \ - if (!cgroup_lock_live_group(cgroup)) \ - return -ENODEV; \ - \ - bgrp = cgroup_to_bfqio(cgroup); \ - spin_lock_irq(&bgrp->lock); \ - ret = bgrp->__VAR; \ - spin_unlock_irq(&bgrp->lock); \ - \ - cgroup_unlock(); \ - \ - return ret; \ -} - -SHOW_FUNCTION(weight); -SHOW_FUNCTION(ioprio); -SHOW_FUNCTION(ioprio_class); -#undef SHOW_FUNCTION - -#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ -static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ - struct cftype *cftype, \ - u64 val) \ -{ \ - struct bfqio_cgroup *bgrp; \ - struct bfq_group *bfqg; \ - struct hlist_node *n; \ - \ - if (val < (__MIN) || val > (__MAX)) \ - return -EINVAL; \ - \ - if (!cgroup_lock_live_group(cgroup)) \ - return -ENODEV; \ - \ - bgrp = cgroup_to_bfqio(cgroup); \ - \ - spin_lock_irq(&bgrp->lock); \ - bgrp->__VAR = (unsigned short)val; \ - hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \ - /* \ - * Setting the ioprio_changed flag of the entity \ - * to 1 with new_##__VAR == ##__VAR would re-set \ - * the value of the weight to its ioprio mapping. \ - * Set the flag only if necessary. \ - */ \ - if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ - bfqg->entity.new_##__VAR = (unsigned short)val; \ - smp_wmb(); \ - bfqg->entity.ioprio_changed = 1; \ - } \ - } \ - spin_unlock_irq(&bgrp->lock); \ - \ - cgroup_unlock(); \ - \ - return 0; \ -} - -STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); -STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); -STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); -#undef STORE_FUNCTION - -static struct cftype bfqio_files[] = { - { - .name = "weight", - .read_u64 = bfqio_cgroup_weight_read, - .write_u64 = bfqio_cgroup_weight_write, - }, - { - .name = "ioprio", - .read_u64 = bfqio_cgroup_ioprio_read, - .write_u64 = bfqio_cgroup_ioprio_write, - }, - { - .name = "ioprio_class", - .read_u64 = bfqio_cgroup_ioprio_class_read, - .write_u64 = bfqio_cgroup_ioprio_class_write, - }, -}; - -static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) -{ - return cgroup_add_files(cgroup, subsys, bfqio_files, - ARRAY_SIZE(bfqio_files)); -} - -static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys, - struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp; - - if (cgroup->parent != NULL) { - bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); - if (bgrp == NULL) - return ERR_PTR(-ENOMEM); - } else - bgrp = &bfqio_root_cgroup; - - spin_lock_init(&bgrp->lock); - INIT_HLIST_HEAD(&bgrp->group_data); - bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; - bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; - - return &bgrp->css; -} - -/* - * We cannot support shared io contexts, as we have no means to support - * two tasks with the same ioc in two different groups without major rework - * of the main cic/bfqq data structures. By now we allow a task to change - * its cgroup only if it's the only owner of its ioc; the drawback of this - * behavior is that a group containing a task that forked using CLONE_IO - * will not be destroyed until the tasks sharing the ioc die. - */ -static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, - struct task_struct *tsk) -{ - struct io_context *ioc; - int ret = 0; - - /* task_lock() is needed to avoid races with exit_io_context() */ - task_lock(tsk); - ioc = tsk->io_context; - if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) - /* - * ioc == NULL means that the task is either too young or - * exiting: if it has still no ioc the ioc can't be shared, - * if the task is exiting the attach will fail anyway, no - * matter what we return here. - */ - ret = -EINVAL; - task_unlock(tsk); - - return ret; -} - -static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, - struct cgroup *prev, struct task_struct *tsk) -{ - struct io_context *ioc; - struct cfq_io_context *cic; - struct hlist_node *n; - - task_lock(tsk); - ioc = tsk->io_context; - if (ioc != NULL) { - BUG_ON(atomic_long_read(&ioc->refcount) == 0); - atomic_long_inc(&ioc->refcount); - } - task_unlock(tsk); - - if (ioc == NULL) - return; - - rcu_read_lock(); - hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) - bfq_cic_change_cgroup(cic, cgroup); - rcu_read_unlock(); - - put_io_context(ioc); -} - -static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) -{ - struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); - struct hlist_node *n, *tmp; - struct bfq_group *bfqg; - - /* - * Since we are destroying the cgroup, there are no more tasks - * referencing it, and all the RCU grace periods that may have - * referenced it are ended (as the destruction of the parent - * cgroup is RCU-safe); bgrp->group_data will not be accessed by - * anything else and we don't need any synchronization. - */ - hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node) - bfq_destroy_group(bgrp, bfqg); - - BUG_ON(!hlist_empty(&bgrp->group_data)); - - kfree(bgrp); -} - -struct cgroup_subsys bfqio_subsys = { - .name = "bfqio", - .create = bfqio_create, - .can_attach = bfqio_can_attach, - .attach = bfqio_attach, - .destroy = bfqio_destroy, - .populate = bfqio_populate, - .subsys_id = bfqio_subsys_id, -}; -#else -static inline void bfq_init_entity(struct bfq_entity *entity, - struct bfq_group *bfqg) -{ - entity->weight = entity->new_weight; - entity->orig_weight = entity->new_weight; - entity->ioprio = entity->new_ioprio; - entity->ioprio_class = entity->new_ioprio_class; - entity->sched_data = &bfqg->sched_data; -} - -static inline struct bfq_group * -bfq_cic_update_cgroup(struct cfq_io_context *cic) -{ - struct bfq_data *bfqd = cic->key; - return bfqd->root_group; -} - -static inline void bfq_bfqq_move(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct bfq_entity *entity, - struct bfq_group *bfqg) -{ -} - -static void bfq_end_raising_async(struct bfq_data *bfqd) -{ - bfq_end_raising_async_queues(bfqd, bfqd->root_group); -} - -static inline void bfq_disconnect_groups(struct bfq_data *bfqd) -{ - bfq_put_async_queues(bfqd, bfqd->root_group); -} - -static inline void bfq_free_root_group(struct bfq_data *bfqd) -{ - kfree(bfqd->root_group); -} - -static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -{ - struct bfq_group *bfqg; - int i; - - bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); - if (bfqg == NULL) - return NULL; - - for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) - bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; - - return bfqg; -} -#endif diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c deleted file mode 100644 index f7366962da0..00000000000 --- a/block/bfq-ioc.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * BFQ: I/O context handling. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente - */ - -/** - * bfq_cic_free_rcu - deferred cic freeing. - * @head: RCU head of the cic to free. - * - * Free the cic containing @head and, if it was the last one and - * the module is exiting wake up anyone waiting for its deallocation - * (see bfq_exit()). - */ -static void bfq_cic_free_rcu(struct rcu_head *head) -{ - struct cfq_io_context *cic; - - cic = container_of(head, struct cfq_io_context, rcu_head); - - kmem_cache_free(bfq_ioc_pool, cic); - elv_ioc_count_dec(bfq_ioc_count); - - if (bfq_ioc_gone != NULL) { - spin_lock(&bfq_ioc_gone_lock); - if (bfq_ioc_gone != NULL && - !elv_ioc_count_read(bfq_ioc_count)) { - complete(bfq_ioc_gone); - bfq_ioc_gone = NULL; - } - spin_unlock(&bfq_ioc_gone_lock); - } -} - -static void bfq_cic_free(struct cfq_io_context *cic) -{ - call_rcu(&cic->rcu_head, bfq_cic_free_rcu); -} - -/** - * cic_free_func - disconnect a cic ready to be freed. - * @ioc: the io_context @cic belongs to. - * @cic: the cic to be freed. - * - * Remove @cic from the @ioc radix tree hash and from its cic list, - * deferring the deallocation of @cic to the end of the current RCU - * grace period. This assumes that __bfq_exit_single_io_context() - * has already been called for @cic. - */ -static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) -{ - unsigned long flags; - unsigned long dead_key = (unsigned long) cic->key; - - BUG_ON(!(dead_key & CIC_DEAD_KEY)); - - spin_lock_irqsave(&ioc->lock, flags); - radix_tree_delete(&ioc->bfq_radix_root, - dead_key >> CIC_DEAD_INDEX_SHIFT); - hlist_del_init_rcu(&cic->cic_list); - spin_unlock_irqrestore(&ioc->lock, flags); - - bfq_cic_free(cic); -} - -static void bfq_free_io_context(struct io_context *ioc) -{ - /* - * ioc->refcount is zero here, or we are called from elv_unregister(), - * so no more cic's are allowed to be linked into this ioc. So it - * should be ok to iterate over the known list, we will see all cic's - * since no new ones are added. - */ - call_for_each_cic(ioc, cic_free_func); -} - -/** - * __bfq_exit_single_io_context - deassociate @cic from any running task. - * @bfqd: bfq_data on which @cic is valid. - * @cic: the cic being exited. - * - * Whenever no more tasks are using @cic or @bfqd is deallocated we - * need to invalidate its entry in the radix tree hash table and to - * release the queues it refers to. - * - * Called under the queue lock. - */ -static void __bfq_exit_single_io_context(struct bfq_data *bfqd, - struct cfq_io_context *cic) -{ - struct io_context *ioc = cic->ioc; - - list_del_init(&cic->queue_list); - - /* - * Make sure dead mark is seen for dead queues - */ - smp_wmb(); - rcu_assign_pointer(cic->key, bfqd_dead_key(bfqd)); - - /* - * No write-side locking as no task is using @ioc (they're exited - * or bfqd is being deallocated. - */ - rcu_read_lock(); - if (rcu_dereference(ioc->ioc_data) == cic) { - rcu_read_unlock(); - spin_lock(&ioc->lock); - rcu_assign_pointer(ioc->ioc_data, NULL); - spin_unlock(&ioc->lock); - } else - rcu_read_unlock(); - - if (cic->cfqq[BLK_RW_ASYNC] != NULL) { - bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_ASYNC]); - cic->cfqq[BLK_RW_ASYNC] = NULL; - } - - spin_lock(&bfqd->eqm_lock); - if (cic->cfqq[BLK_RW_SYNC] != NULL) { - /* - * If the bic is using a shared queue, put the reference - * taken on the io_context when the bic started using a - * shared bfq_queue. - */ - if (bfq_bfqq_coop(cic->cfqq[BLK_RW_SYNC])) - put_io_context(ioc); - bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_SYNC]); - cic->cfqq[BLK_RW_SYNC] = NULL; - } - spin_unlock(&bfqd->eqm_lock); -} - -/** - * bfq_exit_single_io_context - deassociate @cic from @ioc (unlocked version). - * @ioc: the io_context @cic belongs to. - * @cic: the cic being exited. - * - * Take the queue lock and call __bfq_exit_single_io_context() to do the - * rest of the work. We take care of possible races with bfq_exit_queue() - * using bfq_get_bfqd_locked() (and abusing a little bit the RCU mechanism). - */ -static void bfq_exit_single_io_context(struct io_context *ioc, - struct cfq_io_context *cic) -{ - struct bfq_data *bfqd; - unsigned long uninitialized_var(flags); - - bfqd = bfq_get_bfqd_locked(&cic->key, &flags); - if (bfqd != NULL) { - __bfq_exit_single_io_context(bfqd, cic); - bfq_put_bfqd_unlock(bfqd, &flags); - } -} - -/** - * bfq_exit_io_context - deassociate @ioc from all cics it owns. - * @ioc: the @ioc being exited. - * - * No more processes are using @ioc we need to clean up and put the - * internal structures we have that belongs to that process. Loop - * through all its cics, locking their queues and exiting them. - */ -static void bfq_exit_io_context(struct io_context *ioc) -{ - call_for_each_cic(ioc, bfq_exit_single_io_context); -} - -static struct cfq_io_context *bfq_alloc_io_context(struct bfq_data *bfqd, - gfp_t gfp_mask) -{ - struct cfq_io_context *cic; - - cic = kmem_cache_alloc_node(bfq_ioc_pool, gfp_mask | __GFP_ZERO, - bfqd->queue->node); - if (cic != NULL) { - cic->ttime.last_end_request = jiffies; - /* - * A newly created cic indicates that the process has just - * started doing I/O, and is probably mapping into memory its - * executable and libraries: it definitely needs weight raising. - * There is however the possibility that the process performs, - * for a while, I/O close to some other process. EQM intercepts - * this behavior and may merge the queue corresponding to the - * process with some other queue, BEFORE the weight of the queue - * is raised. Merged queues are not weight-raised (they are assumed - * to belong to processes that benefit only from high throughput). - * If the merge is basically the consequence of an accident, then - * the queue will be split soon and will get back its old weight. - * It is then important to write down somewhere that this queue - * does need weight raising, even if it did not make it to get its - * weight raised before being merged. To this purpose, we overload - * the field raising_time_left and assign 1 to it, to mark the queue - * as needing weight raising. - */ - cic->raising_time_left = 1; - INIT_LIST_HEAD(&cic->queue_list); - INIT_HLIST_NODE(&cic->cic_list); - cic->dtor = bfq_free_io_context; - cic->exit = bfq_exit_io_context; - elv_ioc_count_inc(bfq_ioc_count); - } - - return cic; -} - -/** - * bfq_drop_dead_cic - free an exited cic. - * @bfqd: bfq data for the device in use. - * @ioc: io_context owning @cic. - * @cic: the @cic to free. - * - * We drop cfq io contexts lazily, so we may find a dead one. - */ -static void bfq_drop_dead_cic(struct bfq_data *bfqd, struct io_context *ioc, - struct cfq_io_context *cic) -{ - unsigned long flags; - - WARN_ON(!list_empty(&cic->queue_list)); - BUG_ON(cic->key != bfqd_dead_key(bfqd)); - - spin_lock_irqsave(&ioc->lock, flags); - - BUG_ON(ioc->ioc_data == cic); - - /* - * With shared I/O contexts two lookups may race and drop the - * same cic more than one time: RCU guarantees that the storage - * will not be freed too early, here we make sure that we do - * not try to remove the cic from the hashing structures multiple - * times. - */ - if (!hlist_unhashed(&cic->cic_list)) { - radix_tree_delete(&ioc->bfq_radix_root, bfqd->cic_index); - hlist_del_init_rcu(&cic->cic_list); - bfq_cic_free(cic); - } - - spin_unlock_irqrestore(&ioc->lock, flags); -} - -/** - * bfq_cic_lookup - search into @ioc a cic associated to @bfqd. - * @bfqd: the lookup key. - * @ioc: the io_context of the process doing I/O. - * - * If @ioc already has a cic associated to @bfqd return it, return %NULL - * otherwise. - */ -static struct cfq_io_context *bfq_cic_lookup(struct bfq_data *bfqd, - struct io_context *ioc) -{ - struct cfq_io_context *cic; - unsigned long flags; - void *k; - - if (unlikely(ioc == NULL)) - return NULL; - - rcu_read_lock(); - - /* We maintain a last-hit cache, to avoid browsing over the tree. */ - cic = rcu_dereference(ioc->ioc_data); - if (cic != NULL) { - k = rcu_dereference(cic->key); - if (k == bfqd) - goto out; - } - - do { - cic = radix_tree_lookup(&ioc->bfq_radix_root, - bfqd->cic_index); - if (cic == NULL) - goto out; - - k = rcu_dereference(cic->key); - if (unlikely(k != bfqd)) { - rcu_read_unlock(); - bfq_drop_dead_cic(bfqd, ioc, cic); - rcu_read_lock(); - continue; - } - - spin_lock_irqsave(&ioc->lock, flags); - rcu_assign_pointer(ioc->ioc_data, cic); - spin_unlock_irqrestore(&ioc->lock, flags); - break; - } while (1); - -out: - rcu_read_unlock(); - - return cic; -} - -/** - * bfq_cic_link - add @cic to @ioc. - * @bfqd: bfq_data @cic refers to. - * @ioc: io_context @cic belongs to. - * @cic: the cic to link. - * @gfp_mask: the mask to use for radix tree preallocations. - * - * Add @cic to @ioc, using @bfqd as the search key. This enables us to - * lookup the process specific cfq io context when entered from the block - * layer. Also adds @cic to a per-bfqd list, used when this queue is - * removed. - */ -static int bfq_cic_link(struct bfq_data *bfqd, struct io_context *ioc, - struct cfq_io_context *cic, gfp_t gfp_mask) -{ - unsigned long flags; - int ret; - - ret = radix_tree_preload(gfp_mask); - if (ret == 0) { - cic->ioc = ioc; - - /* No write-side locking, cic is not published yet. */ - rcu_assign_pointer(cic->key, bfqd); - - spin_lock_irqsave(&ioc->lock, flags); - ret = radix_tree_insert(&ioc->bfq_radix_root, - bfqd->cic_index, cic); - if (ret == 0) - hlist_add_head_rcu(&cic->cic_list, &ioc->bfq_cic_list); - spin_unlock_irqrestore(&ioc->lock, flags); - - radix_tree_preload_end(); - - if (ret == 0) { - spin_lock_irqsave(bfqd->queue->queue_lock, flags); - list_add(&cic->queue_list, &bfqd->cic_list); - spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); - } - } - - if (ret != 0) - printk(KERN_ERR "bfq: cic link failed!\n"); - - return ret; -} - -/** - * bfq_ioc_set_ioprio - signal a priority change to the cics belonging to @ioc. - * @ioc: the io_context changing its priority. - */ -static inline void bfq_ioc_set_ioprio(struct io_context *ioc) -{ - call_for_each_cic(ioc, bfq_changed_ioprio); -} - -/** - * bfq_get_io_context - return the @cic associated to @bfqd in @ioc. - * @bfqd: the search key. - * @gfp_mask: the mask to use for cic allocation. - * - * Setup general io context and cfq io context. There can be several cfq - * io contexts per general io context, if this process is doing io to more - * than one device managed by cfq. - */ -static struct cfq_io_context *bfq_get_io_context(struct bfq_data *bfqd, - gfp_t gfp_mask) -{ - struct io_context *ioc = NULL; - struct cfq_io_context *cic; - - might_sleep_if(gfp_mask & __GFP_WAIT); - - ioc = get_io_context(gfp_mask, bfqd->queue->node); - if (ioc == NULL) - return NULL; - - /* Lookup for an existing cic. */ - cic = bfq_cic_lookup(bfqd, ioc); - if (cic != NULL) - goto out; - - /* Alloc one if needed. */ - cic = bfq_alloc_io_context(bfqd, gfp_mask); - if (cic == NULL) - goto err; - - /* Link it into the ioc's radix tree and cic list. */ - if (bfq_cic_link(bfqd, ioc, cic, gfp_mask) != 0) - goto err_free; - -out: - /* - * test_and_clear_bit() implies a memory barrier, paired with - * the wmb() in fs/ioprio.c, so the value seen for ioprio is the - * new one. - */ - if (unlikely(test_and_clear_bit(IOC_BFQ_IOPRIO_CHANGED, - ioc->ioprio_changed))) - bfq_ioc_set_ioprio(ioc); - - return cic; -err_free: - bfq_cic_free(cic); -err: - put_io_context(ioc); - return NULL; -} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c deleted file mode 100644 index 44132842fcc..00000000000 --- a/block/bfq-iosched.c +++ /dev/null @@ -1,3370 +0,0 @@ -/* - * BFQ, or Budget Fair Queueing, disk scheduler. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente - * - * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. - * - * BFQ is a proportional share disk scheduling algorithm based on the - * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, - * measured in number of sectors, to tasks instead of time slices. - * The disk is not granted to the active task for a given time slice, - * but until it has exahusted its assigned budget. This change from - * the time to the service domain allows BFQ to distribute the disk - * bandwidth among tasks as desired, without any distortion due to - * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc - * internal scheduler, called B-WF2Q+, to schedule tasks according to - * their budgets. Thanks to this accurate scheduler, BFQ can afford - * to assign high budgets to disk-bound non-seeky tasks (to boost the - * throughput), and yet guarantee low latencies to interactive and - * soft real-time applications. - * - * BFQ has been introduced in [1], where the interested reader can - * find an accurate description of the algorithm, the bandwidth - * distribution and latency guarantees it provides, plus formal proofs - * of all the properties. With respect to the algorithm presented in - * the paper, this implementation adds several little heuristics, and - * a hierarchical extension, based on H-WF2Q+. - * - * B-WF2Q+ is based on WF2Q+, that is described in [2], together with - * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) - * complexity derives from the one introduced with EEVDF in [3]. - * - * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling - * with Deterministic Guarantees on Bandwidth Distribution,'', - * IEEE Transactions on Computer, May 2010. - * - * http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf - * - * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing - * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, - * Oct 1997. - * - * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz - * - * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline - * First: A Flexible and Accurate Mechanism for Proportional Share - * Resource Allocation,'' technical report. - * - * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "bfq.h" - -/* Max number of dispatches in one round of service. */ -static const int bfq_quantum = 4; - -/* Expiration time of sync (0) and async (1) requests, in jiffies. */ -//static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; -static const int bfq_fifo_expire[2] = { 33, 8 }; - -/* Maximum backwards seek, in KiB. */ -//static const int bfq_back_max = 16 * 1024; -static const int bfq_back_max = 12582912; - -/* Penalty of a backwards seek, in number of sectors. */ -//static const int bfq_back_penalty = 2; -static const int bfq_back_penalty = 1; - -/* Idling period duration, in jiffies. */ -//static int bfq_slice_idle = HZ / 125; -static int bfq_slice_idle = 0; - -/* Default maximum budget values, in sectors and number of requests. */ -//static const int bfq_default_max_budget = 16 * 1024; -static const int bfq_default_max_budget = 12582912; -static const int bfq_max_budget_async_rq = 4; - -/* - * Async to sync throughput distribution is controlled as follows: - * when an async request is served, the entity is charged the number - * of sectors of the request, multipled by the factor below - */ -static const int bfq_async_charge_factor = 10; - -/* Default timeout values, in jiffies, approximating CFQ defaults. */ -//static const int bfq_timeout_sync = HZ / 8; -static const int bfq_timeout_sync = 7; -//static int bfq_timeout_async = HZ / 25; -static int bfq_timeout_async = 5; - -struct kmem_cache *bfq_pool; -struct kmem_cache *bfq_ioc_pool; - -static DEFINE_PER_CPU(unsigned long, bfq_ioc_count); -static struct completion *bfq_ioc_gone; -static DEFINE_SPINLOCK(bfq_ioc_gone_lock); - -static DEFINE_SPINLOCK(cic_index_lock); -static DEFINE_IDA(cic_index_ida); - -/* Below this threshold (in ms), we consider thinktime immediate. */ -#define BFQ_MIN_TT 2 - -/* hw_tag detection: parallel requests threshold and min samples needed. */ -#define BFQ_HW_QUEUE_THRESHOLD 4 -#define BFQ_HW_QUEUE_SAMPLES 32 - -#define BFQQ_SEEK_THR (sector_t)(8 * 1024) -#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) - -/* Min samples used for peak rate estimation (for autotuning). */ -#define BFQ_PEAK_RATE_SAMPLES 32 - -/* Shift used for peak rate fixed precision calculations. */ -#define BFQ_RATE_SHIFT 16 - -/* - * The duration of the weight raising for interactive applications is - * computed automatically (as default behaviour), using the following - * formula: duration = (R / r) * T, where r is the peak rate of the - * disk, and R and T are two reference parameters. In particular, R is - * the peak rate of a reference disk, and T is about the maximum time - * for starting popular large applications on that disk, under BFQ and - * while reading two files in parallel. Finally, BFQ uses two - * different pairs (R, T) depending on whether the disk is rotational - * or non-rotational. - */ -#define T_rot (msecs_to_jiffies(5500)) -#define T_nonrot (msecs_to_jiffies(2000)) -/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */ -#define R_rot 17415 -#define R_nonrot 34791 - -#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ - { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) - -#define RQ_CIC(rq) \ - ((struct cfq_io_context *) (rq)->elevator_private[0]) -#define RQ_BFQQ(rq) ((rq)->elevator_private[1]) - -static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); - -#include "bfq-ioc.c" -#include "bfq-sched.c" -#include "bfq-cgroup.c" - -#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ - IOPRIO_CLASS_IDLE) -#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ - IOPRIO_CLASS_RT) - -#define bfq_sample_valid(samples) ((samples) > 80) - -/* - * We regard a request as SYNC, if either it's a read or has the SYNC bit - * set (in which case it could also be a direct WRITE). - */ -static inline int bfq_bio_sync(struct bio *bio) -{ - if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) - return 1; - - return 0; -} - -/* - * Scheduler run of queue, if there are requests pending and no one in the - * driver that will restart queueing. - */ -static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) -{ - if (bfqd->queued != 0) { - bfq_log(bfqd, "schedule dispatch"); - kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); - } -} - -/* - * Lifted from AS - choose which of rq1 and rq2 that is best served now. - * We choose the request that is closesr to the head right now. Distance - * behind the head is penalized and only allowed to a certain extent. - */ -static struct request *bfq_choose_req(struct bfq_data *bfqd, - struct request *rq1, - struct request *rq2, - sector_t last) -{ - sector_t s1, s2, d1 = 0, d2 = 0; - unsigned long back_max; -#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ -#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ - unsigned wrap = 0; /* bit mask: requests behind the disk head? */ - - if (rq1 == NULL || rq1 == rq2) - return rq2; - if (rq2 == NULL) - return rq1; - - if (rq_is_sync(rq1) && !rq_is_sync(rq2)) - return rq1; - else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) - return rq2; - if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) - return rq1; - else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) - return rq2; - - s1 = blk_rq_pos(rq1); - s2 = blk_rq_pos(rq2); - - /* - * By definition, 1KiB is 2 sectors. - */ - back_max = bfqd->bfq_back_max * 2; - - /* - * Strict one way elevator _except_ in the case where we allow - * short backward seeks which are biased as twice the cost of a - * similar forward seek. - */ - if (s1 >= last) - d1 = s1 - last; - else if (s1 + back_max >= last) - d1 = (last - s1) * bfqd->bfq_back_penalty; - else - wrap |= BFQ_RQ1_WRAP; - - if (s2 >= last) - d2 = s2 - last; - else if (s2 + back_max >= last) - d2 = (last - s2) * bfqd->bfq_back_penalty; - else - wrap |= BFQ_RQ2_WRAP; - - /* Found required data */ - - /* - * By doing switch() on the bit mask "wrap" we avoid having to - * check two variables for all permutations: --> faster! - */ - switch (wrap) { - case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ - if (d1 < d2) - return rq1; - else if (d2 < d1) - return rq2; - else { - if (s1 >= s2) - return rq1; - else - return rq2; - } - - case BFQ_RQ2_WRAP: - return rq1; - case BFQ_RQ1_WRAP: - return rq2; - case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ - default: - /* - * Since both rqs are wrapped, - * start with the one that's further behind head - * (--> only *one* back seek required), - * since back seek takes more time than forward. - */ - if (s1 <= s2) - return rq1; - else - return rq2; - } -} - -static struct bfq_queue * -bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, - sector_t sector, struct rb_node **ret_parent, - struct rb_node ***rb_link) -{ - struct rb_node **p, *parent; - struct bfq_queue *bfqq = NULL; - - parent = NULL; - p = &root->rb_node; - while (*p) { - struct rb_node **n; - - parent = *p; - bfqq = rb_entry(parent, struct bfq_queue, pos_node); - - /* - * Sort strictly based on sector. Smallest to the left, - * largest to the right. - */ - if (sector > blk_rq_pos(bfqq->next_rq)) - n = &(*p)->rb_right; - else if (sector < blk_rq_pos(bfqq->next_rq)) - n = &(*p)->rb_left; - else - break; - p = n; - bfqq = NULL; - } - - *ret_parent = parent; - if (rb_link) - *rb_link = p; - - bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", - (long long unsigned)sector, - bfqq != NULL ? bfqq->pid : 0); - - return bfqq; -} - -static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - struct rb_node **p, *parent; - struct bfq_queue *__bfqq; - - if (bfqq->pos_root != NULL) { - rb_erase(&bfqq->pos_node, bfqq->pos_root); - bfqq->pos_root = NULL; - } - - if (bfq_class_idle(bfqq)) - return; - if (!bfqq->next_rq) - return; - - bfqq->pos_root = &bfqd->rq_pos_tree; - __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, - blk_rq_pos(bfqq->next_rq), &parent, &p); - if (__bfqq == NULL) { - rb_link_node(&bfqq->pos_node, parent, p); - rb_insert_color(&bfqq->pos_node, bfqq->pos_root); - } else - bfqq->pos_root = NULL; -} - -static struct request *bfq_find_next_rq(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct request *last) -{ - struct rb_node *rbnext = rb_next(&last->rb_node); - struct rb_node *rbprev = rb_prev(&last->rb_node); - struct request *next = NULL, *prev = NULL; - - BUG_ON(RB_EMPTY_NODE(&last->rb_node)); - - if (rbprev != NULL) - prev = rb_entry_rq(rbprev); - - if (rbnext != NULL) - next = rb_entry_rq(rbnext); - else { - rbnext = rb_first(&bfqq->sort_list); - if (rbnext && rbnext != &last->rb_node) - next = rb_entry_rq(rbnext); - } - - return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); -} - -/* Must be called with eqm_lock held */ -static void bfq_del_rq_rb(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_data *bfqd = bfqq->bfqd; - const int sync = rq_is_sync(rq); - - BUG_ON(bfqq->queued[sync] == 0); - bfqq->queued[sync]--; - bfqd->queued--; - - elv_rb_del(&bfqq->sort_list, rq); - - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { - if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue) - bfq_del_bfqq_busy(bfqd, bfqq, 1); - /* - * Remove queue from request-position tree as it is empty. - */ - if (bfqq->pos_root != NULL) { - rb_erase(&bfqq->pos_node, bfqq->pos_root); - bfqq->pos_root = NULL; - } - } -} - -/* see the definition of bfq_async_charge_factor for details */ -static inline unsigned long bfq_serv_to_charge(struct request *rq, - struct bfq_queue *bfqq) -{ - return blk_rq_sectors(rq) * - (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) * - bfq_async_charge_factor)); -} - -/** - * bfq_updated_next_req - update the queue after a new next_rq selection. - * @bfqd: the device data the queue belongs to. - * @bfqq: the queue to update. - * - * If the first request of a queue changes we make sure that the queue - * has enough budget to serve at least its first request (if the - * request has grown). We do this because if the queue has not enough - * budget for its first request, it has to go through two dispatch - * rounds to actually get it dispatched. - */ -static void bfq_updated_next_req(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - struct request *next_rq = bfqq->next_rq; - unsigned long new_budget; - - if (next_rq == NULL) - return; - - if (bfqq == bfqd->active_queue) - /* - * In order not to break guarantees, budgets cannot be - * changed after an entity has been selected. - */ - return; - - BUG_ON(entity->tree != &st->active); - BUG_ON(entity == entity->sched_data->active_entity); - - new_budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); - entity->budget = new_budget; - bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget); - bfq_activate_bfqq(bfqd, bfqq); -} - -static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd) -{ - u64 dur; - - if (bfqd->bfq_raising_max_time > 0) - return bfqd->bfq_raising_max_time; - - dur = bfqd->RT_prod; - do_div(dur, bfqd->peak_rate); - - return dur; -} - -static inline void -bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct cfq_io_context *cic) -{ - if (cic->saved_idle_window) - bfq_mark_bfqq_idle_window(bfqq); - else - bfq_clear_bfqq_idle_window(bfqq); - if (cic->raising_time_left && bfqq->bfqd->low_latency) { - /* - * Start a weight raising period with the duration given by - * the raising_time_left snapshot. - */ - bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff; - bfqq->raising_cur_max_time = cic->raising_time_left; - bfqq->last_rais_start_finish = jiffies; - } - /* - * Clear raising_time_left to prevent bfq_bfqq_save_state() from - * getting confused about the queue's need of a weight-raising - * period. - */ - cic->raising_time_left = 0; -} - -/* - * Must be called with the queue_lock held. - */ -static int bfqq_process_refs(struct bfq_queue *bfqq) -{ - int process_refs, io_refs; - - io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; - process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; - BUG_ON(process_refs < 0); - return process_refs; -} - -static void bfq_add_rq_rb(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_entity *entity = &bfqq->entity; - struct bfq_data *bfqd = bfqq->bfqd; - struct request *next_rq, *prev; - unsigned long old_raising_coeff = bfqq->raising_coeff; - int idle_for_long_time = bfqq->budget_timeout + - bfqd->bfq_raising_min_idle_time < jiffies; - - bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq)); - bfqq->queued[rq_is_sync(rq)]++; - bfqd->queued++; - - elv_rb_add(&bfqq->sort_list, rq); - - spin_lock(&bfqd->eqm_lock); - - /* - * Check if this request is a better next-serve candidate. - */ - prev = bfqq->next_rq; - next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); - BUG_ON(next_rq == NULL); - bfqq->next_rq = next_rq; - - /* - * Adjust priority tree position, if next_rq changes. - */ - if (prev != bfqq->next_rq) - bfq_rq_pos_tree_add(bfqd, bfqq); - - spin_unlock(&bfqd->eqm_lock); - - if (!bfq_bfqq_busy(bfqq)) { - int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && - bfqq->soft_rt_next_start < jiffies; - entity->budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); - - if (! bfqd->low_latency) - goto add_bfqq_busy; - - if (bfq_bfqq_just_split(bfqq)) - goto set_ioprio_changed; - - /* - * If the queue: - * - is not being boosted, - * - has been idle for enough time, - * - is not a sync queue or is linked to a cfq_io_context (it is - * shared "for its nature" or it is not shared and its - * requests have not been redirected to a shared queue) - * start a weight-raising period. - */ - if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt) && - (!bfq_bfqq_sync(bfqq) || bfqq->cic != NULL)) { - bfqq->raising_coeff = bfqd->bfq_raising_coeff; - if (idle_for_long_time) - bfqq->raising_cur_max_time = - bfq_wrais_duration(bfqd); - else - bfqq->raising_cur_max_time = - bfqd->bfq_raising_rt_max_time; - bfq_log_bfqq(bfqd, bfqq, - "wrais starting at %llu msec," - "rais_max_time %u", - bfqq->last_rais_start_finish, - jiffies_to_msecs(bfqq-> - raising_cur_max_time)); - } else if (old_raising_coeff > 1) { - if (idle_for_long_time) - bfqq->raising_cur_max_time = - bfq_wrais_duration(bfqd); - else if (bfqq->raising_cur_max_time == - bfqd->bfq_raising_rt_max_time && - !soft_rt) { - bfqq->raising_coeff = 1; - bfq_log_bfqq(bfqd, bfqq, - "wrais ending at %llu msec," - "rais_max_time %u", - bfqq->last_rais_start_finish, - jiffies_to_msecs(bfqq-> - raising_cur_max_time)); - } - } -set_ioprio_changed: - if (old_raising_coeff != bfqq->raising_coeff) - entity->ioprio_changed = 1; -add_bfqq_busy: - bfq_add_bfqq_busy(bfqd, bfqq); - } else { - if(bfqd->low_latency && old_raising_coeff == 1 && - !rq_is_sync(rq) && - bfqq->last_rais_start_finish + - bfqd->bfq_raising_min_inter_arr_async < jiffies) { - bfqq->raising_coeff = bfqd->bfq_raising_coeff; - bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd); - - entity->ioprio_changed = 1; - bfq_log_bfqq(bfqd, bfqq, - "non-idle wrais starting at %llu msec," - "rais_max_time %u", - bfqq->last_rais_start_finish, - jiffies_to_msecs(bfqq-> - raising_cur_max_time)); - } - bfq_updated_next_req(bfqd, bfqq); - } - - if(bfqd->low_latency && - (old_raising_coeff == 1 || bfqq->raising_coeff == 1 || - idle_for_long_time)) - bfqq->last_rais_start_finish = jiffies; -} - -static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq) -{ - elv_rb_del(&bfqq->sort_list, rq); - bfqq->queued[rq_is_sync(rq)]--; - bfqq->bfqd->queued--; - bfq_add_rq_rb(rq); -} - -static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, - struct bio *bio) -{ - struct task_struct *tsk = current; - struct cfq_io_context *cic; - struct bfq_queue *bfqq; - - cic = bfq_cic_lookup(bfqd, tsk->io_context); - if (cic == NULL) - return NULL; - - spin_lock(&bfqd->eqm_lock); - bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); - spin_unlock(&bfqd->eqm_lock); - if (bfqq != NULL) { - sector_t sector = bio->bi_sector + bio_sectors(bio); - - return elv_rb_find(&bfqq->sort_list, sector); - } - - return NULL; -} - -static void bfq_activate_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - - bfqd->rq_in_driver++; - bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); - bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", - (long long unsigned)bfqd->last_position); -} - -static void bfq_deactivate_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - - WARN_ON(bfqd->rq_in_driver == 0); - bfqd->rq_in_driver--; -} - -static void bfq_remove_request(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_data *bfqd = bfqq->bfqd; - - spin_lock(&bfqq->bfqd->eqm_lock); - if (bfqq->next_rq == rq) { - bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); - bfq_updated_next_req(bfqd, bfqq); - } - - list_del_init(&rq->queuelist); - bfq_del_rq_rb(rq); - spin_unlock(&bfqq->bfqd->eqm_lock); - - if (rq->cmd_flags & REQ_META) { - WARN_ON(bfqq->meta_pending == 0); - bfqq->meta_pending--; - } -} - -static int bfq_merge(struct request_queue *q, struct request **req, - struct bio *bio) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct request *__rq; - - __rq = bfq_find_rq_fmerge(bfqd, bio); - if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { - *req = __rq; - return ELEVATOR_FRONT_MERGE; - } - - return ELEVATOR_NO_MERGE; -} - -static void bfq_merged_request(struct request_queue *q, struct request *req, - int type) -{ - if (type == ELEVATOR_FRONT_MERGE) { - struct bfq_queue *bfqq = RQ_BFQQ(req); - - bfq_reposition_rq_rb(bfqq, req); - } -} - -static void bfq_merged_requests(struct request_queue *q, struct request *rq, - struct request *next) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - - /* - * Reposition in fifo if next is older than rq. - */ - if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && - time_before(rq_fifo_time(next), rq_fifo_time(rq))) { - list_move(&rq->queuelist, &next->queuelist); - rq_set_fifo_time(rq, rq_fifo_time(next)); - } - - /* - * eqm_lock needed to avoid that other critical sections not holding - * the queue_lock read an inconsistent value from bfqq->next_rq while - * traversing the rq_pos_trees - */ - if (bfqq->next_rq == next) { - spin_lock(&bfqq->bfqd->eqm_lock); - bfqq->next_rq = rq; - spin_unlock(&bfqq->bfqd->eqm_lock); - } - - bfq_remove_request(next); -} - -/* Must be called with bfqq != NULL */ -static inline void bfq_bfqq_end_raising(struct bfq_queue *bfqq) -{ - BUG_ON(bfqq == NULL); - bfqq->raising_coeff = 1; - bfqq->raising_cur_max_time = 0; - /* Trigger a weight change on the next activation of the queue */ - bfqq->entity.ioprio_changed = 1; -} - -static void bfq_end_raising_async_queues(struct bfq_data *bfqd, - struct bfq_group *bfqg) -{ - int i, j; - - for (i = 0; i < 2; i++) - for (j = 0; j < IOPRIO_BE_NR; j++) - if (bfqg->async_bfqq[i][j] != NULL) - bfq_bfqq_end_raising(bfqg->async_bfqq[i][j]); - if (bfqg->async_idle_bfqq != NULL) - bfq_bfqq_end_raising(bfqg->async_idle_bfqq); -} - -static void bfq_end_raising(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq; - - spin_lock_irq(bfqd->queue->queue_lock); - - list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) - bfq_bfqq_end_raising(bfqq); - list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) - bfq_bfqq_end_raising(bfqq); - bfq_end_raising_async(bfqd); - - spin_unlock_irq(bfqd->queue->queue_lock); -} - -static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) -{ - if (request) - return blk_rq_pos(io_struct); - else - return ((struct bio *)io_struct)->bi_sector; -} - -static inline sector_t bfq_dist_from(sector_t pos1, - sector_t pos2) -{ - if (pos1 >= pos2) - return pos1 - pos2; - else - return pos2 - pos1; -} - -static inline int bfq_rq_close_to_sector(void *io_struct, bool request, - sector_t sector) -{ - return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= - BFQQ_SEEK_THR; -} - -static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) -{ - struct rb_root *root = &bfqd->rq_pos_tree; - struct rb_node *parent, *node; - struct bfq_queue *__bfqq; - - if (RB_EMPTY_ROOT(root)) - return NULL; - - /* - * First, if we find a request starting at the end of the last - * request, choose it. - */ - __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); - if (__bfqq != NULL) - return __bfqq; - - /* - * If the exact sector wasn't found, the parent of the NULL leaf - * will contain the closest sector (rq_pos_tree sorted by next_request - * position). - */ - __bfqq = rb_entry(parent, struct bfq_queue, pos_node); - if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) - return __bfqq; - - if (blk_rq_pos(__bfqq->next_rq) < sector) - node = rb_next(&__bfqq->pos_node); - else - node = rb_prev(&__bfqq->pos_node); - if (node == NULL) - return NULL; - - __bfqq = rb_entry(node, struct bfq_queue, pos_node); - if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) - return __bfqq; - - return NULL; -} - -/* - * bfqd - obvious - * cur_bfqq - passed in so that we don't decide that the current queue - * is closely cooperating with itself - * sector - used as a reference point to search for a close queue - */ -static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, - struct bfq_queue *cur_bfqq, - sector_t sector) -{ - struct bfq_queue *bfqq; - - if (bfq_class_idle(cur_bfqq)) - return NULL; - if (!bfq_bfqq_sync(cur_bfqq)) - return NULL; - if (BFQQ_SEEKY(cur_bfqq)) - return NULL; - - /* If device has only one backlogged bfq_queue, don't search. */ - if (bfqd->busy_queues == 1) - return NULL; - - /* - * We should notice if some of the queues are cooperating, e.g. - * working closely on the same area of the disk. In that case, - * we can group them together and don't waste time idling. - */ - bfqq = bfqq_close(bfqd, sector); - if (bfqq == NULL || bfqq == cur_bfqq) - return NULL; - - /* - * Do not merge queues from different bfq_groups. - */ - if (bfqq->entity.parent != cur_bfqq->entity.parent) - return NULL; - - /* - * It only makes sense to merge sync queues. - */ - if (!bfq_bfqq_sync(bfqq)) - return NULL; - if (BFQQ_SEEKY(bfqq)) - return NULL; - - /* - * Do not merge queues of different priority classes. - */ - if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) - return NULL; - - return bfqq; -} - -static struct bfq_queue * -bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -{ - int process_refs, new_process_refs; - struct bfq_queue *__bfqq; - - /* - * If there are no process references on the new_bfqq, then it is - * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain - * may have dropped their last reference (not just their last process - * reference). - */ - if (!bfqq_process_refs(new_bfqq)) - return NULL; - - /* Avoid a circular list and skip interim queue merges. */ - while ((__bfqq = new_bfqq->new_bfqq)) { - if (__bfqq == bfqq) - return NULL; - new_bfqq = __bfqq; - } - - process_refs = bfqq_process_refs(bfqq); - new_process_refs = bfqq_process_refs(new_bfqq); - /* - * If the process for the bfqq has gone away, there is no - * sense in merging the queues. - */ - if (process_refs == 0 || new_process_refs == 0) - return NULL; - - bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", - new_bfqq->pid); - - /* - * Merging is just a redirection: the requests of the process owning - * one of the two queues are redirected to the other queue. The latter - * queue, in its turn, is set as shared if this is the first time that - * the requests of some process are redirected to it. - * - * We redirect bfqq to new_bfqq and not the opposite, because we - * are in the context of the process owning bfqq, hence we have the - * io_cq of this process. So we can immediately configure this io_cq - * to redirect the requests of the process to new_bfqq. - * - * NOTE, even if new_bfqq coincides with the active queue, the io_cq of - * new_bfqq is not available, because, if the active queue is shared, - * bfqd->active_cic may not point to the io_cq of the active queue. - * Redirecting the requests of the process owning bfqq to the currently - * active queue is in any case the best option, as we feed the active queue - * with new requests close to the last request served and, by doing so, - * hopefully increase the throughput. - */ - bfqq->new_bfqq = new_bfqq; - atomic_add(process_refs, &new_bfqq->ref); - return new_bfqq; -} - -/* - * Attempt to schedule a merge of bfqq with the currently active queue or - * with a close queue among the scheduled queues. - * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue - * structure otherwise. - */ -static struct bfq_queue * -bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, - void *io_struct, bool request) -{ - struct bfq_queue *active_bfqq, *new_bfqq; - - if (bfqq->new_bfqq) - return bfqq->new_bfqq; - - if (!io_struct) - return NULL; - - active_bfqq = bfqd->active_queue; - - if (active_bfqq == NULL || active_bfqq == bfqq || !bfqd->active_cic) - goto check_scheduled; - - if (bfq_class_idle(active_bfqq) || bfq_class_idle(bfqq)) - goto check_scheduled; - - if (bfq_class_rt(active_bfqq) != bfq_class_rt(bfqq)) - goto check_scheduled; - - if (active_bfqq->entity.parent != bfqq->entity.parent) - goto check_scheduled; - - if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && - bfq_bfqq_sync(active_bfqq) && bfq_bfqq_sync(bfqq)) - if ((new_bfqq = bfq_setup_merge(bfqq, active_bfqq))) - return new_bfqq; /* Merge with the active queue */ - - /* - * Check whether there is a cooperator among currently scheduled - * queues. The only thing we need is that the bio/request is not - * NULL, as we need it to establish whether a cooperator exists. - */ -check_scheduled: - new_bfqq = bfq_close_cooperator(bfqd, bfqq, - bfq_io_struct_pos(io_struct, request)); - if (new_bfqq) - return bfq_setup_merge(bfqq, new_bfqq); - - return NULL; -} - -static inline void -bfq_bfqq_save_state(struct bfq_queue *bfqq) -{ - /* - * If bfqq->cic == NULL, the queue is already shared or its requests - * have already been redirected to a shared queue; both idle window - * and weight raising state have already been saved. Do nothing. - */ - if (bfqq->cic == NULL) - return; - if (bfqq->cic->raising_time_left) - /* - * This is the queue of a just-started process, and would - * deserve weight raising: we set raising_time_left to the full - * weight-raising duration to trigger weight-raising when and - * if the queue is split and the first request of the queue - * is enqueued. - */ - bfqq->cic->raising_time_left = bfq_wrais_duration(bfqq->bfqd); - else if (bfqq->raising_coeff > 1) { - unsigned long wrais_duration = - jiffies - bfqq->last_rais_start_finish; - /* - * It may happen that a queue's weight raising period lasts - * longer than its raising_cur_max_time, as weight raising is - * handled only when a request is enqueued or dispatched (it - * does not use any timer). If the weight raising period is - * about to end, don't save it. - */ - if (bfqq->raising_cur_max_time <= wrais_duration) - bfqq->cic->raising_time_left = 0; - else - bfqq->cic->raising_time_left = - bfqq->raising_cur_max_time - wrais_duration; - /* - * The bfq_queue is becoming shared or the requests of the - * process owning the queue are being redirected to a shared - * queue. Stop the weight raising period of the queue, as in - * both cases it should not be owned by an interactive or soft - * real-time application. - */ - bfq_bfqq_end_raising(bfqq); - } else - bfqq->cic->raising_time_left = 0; - bfqq->cic->saved_idle_window = bfq_bfqq_idle_window(bfqq); -} - -static inline void -bfq_get_cic_reference(struct bfq_queue *bfqq) -{ - /* - * If bfqq->cic has a non-NULL value, the cic to which it belongs - * is about to begin using a shared bfq_queue. - */ - if (bfqq->cic) - atomic_long_inc(&bfqq->cic->ioc->refcount); -} - -static void -bfq_merge_bfqqs(struct bfq_data *bfqd, struct cfq_io_context *cic, - struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -{ - bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", - (long unsigned)new_bfqq->pid); - /* Save weight raising and idle window of the merged queues */ - bfq_bfqq_save_state(bfqq); - bfq_bfqq_save_state(new_bfqq); - /* - * Grab a reference to the cic, to prevent it from being destroyed - * before being possibly touched by a bfq_split_bfqq(). - */ - bfq_get_cic_reference(bfqq); - bfq_get_cic_reference(new_bfqq); - /* Merge queues (that is, let cic redirect its requests to new_bfqq) */ - cic_set_bfqq(cic, new_bfqq, 1); - bfq_mark_bfqq_coop(new_bfqq); - /* - * new_bfqq now belongs to at least two cics (it is a shared queue): set - * new_bfqq->cic to NULL. bfqq either: - * - does not belong to any cic any more, and hence bfqq->cic must - * be set to NULL, or - * - is a queue whose owning cics have already been redirected to a - * different queue, hence the queue is destined to not belong to any - * cic soon and bfqq->cic is already NULL (therefore the next - * assignment causes no harm). - */ - new_bfqq->cic = NULL; - bfqq->cic = NULL; - bfq_put_queue(bfqq); -} - -static int bfq_allow_merge(struct request_queue *q, struct request *rq, - struct bio *bio) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct cfq_io_context *cic; - struct bfq_queue *bfqq, *new_bfqq; - unsigned long flags; - - /* Disallow merge of a sync bio into an async request. */ - if (bfq_bio_sync(bio) && !rq_is_sync(rq)) - return 0; - - /* - * Lookup the bfqq that this bio will be queued with. Allow - * merge only if rq is queued there. - */ - cic = bfq_cic_lookup(bfqd, current->io_context); - if (cic == NULL) - return 0; - - /* - * The allow_merge_fn scheduler hook may be called with or without - * the queue_lock being held. Access to the rq_pos_tree data - * structures and to cic->bfqq[] is protected by the eqm_lock. - */ - spin_lock_irqsave(&bfqd->eqm_lock, flags); - bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); - /* - * We take advantage of this function to perform an early merge - * of the queues of possible cooperating processes. - */ - if (bfqq != NULL && - (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false))) { - bfq_merge_bfqqs(bfqd, cic, bfqq, new_bfqq); - /* - * If we get here, the bio will be queued in the shared queue, - * i.e., new_bfqq, so use new_bfqq to decide whether bio and - * rq can be merged. - */ - bfqq = new_bfqq; - } - spin_unlock_irqrestore(&bfqd->eqm_lock, flags); - - return bfqq == RQ_BFQQ(rq); -} - -static void __bfq_set_active_queue(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - if (bfqq != NULL) { - bfq_mark_bfqq_must_alloc(bfqq); - bfq_mark_bfqq_budget_new(bfqq); - bfq_clear_bfqq_fifo_expire(bfqq); - - bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; - - bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu", - bfqq->entity.budget); - } - - bfqd->active_queue = bfqq; -} - -/* - * Get and set a new active queue for service. - */ -static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); - - __bfq_set_active_queue(bfqd, bfqq); - return bfqq; -} - -/* - * If enough samples have been computed, return the current max budget - * stored in bfqd, which is dynamically updated according to the - * estimated disk peak rate; otherwise return the default max budget - */ -static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) -{ - if (bfqd->budgets_assigned < 194) - return bfq_default_max_budget; - else - return bfqd->bfq_max_budget; -} - -/* - * Return min budget, which is a fraction of the current or default - * max budget (trying with 1/32) - */ -static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) -{ - if (bfqd->budgets_assigned < 194) - return bfq_default_max_budget / 32; - else - return bfqd->bfq_max_budget / 32; -} - -/* - * Decides whether idling should be done for given device and - * given active queue. - */ -static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd, - struct bfq_queue *active_bfqq) -{ - if (active_bfqq == NULL) - return false; - /* - * If device is SSD it has no seek penalty, disable idling; but - * do so only if: - * - device does not support queuing, otherwise we still have - * a problem with sync vs async workloads; - * - the queue is not weight-raised, to preserve guarantees. - */ - return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag && - active_bfqq->raising_coeff == 1); -} - -static void bfq_arm_slice_timer(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq = bfqd->active_queue; - struct cfq_io_context *cic; - unsigned long sl; - - WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); - - /* Tasks have exited, don't wait. */ - cic = bfqd->active_cic; - if (cic == NULL || atomic_read(&cic->ioc->nr_tasks) == 0) - return; - - bfq_mark_bfqq_wait_request(bfqq); - - /* - * We don't want to idle for seeks, but we do want to allow - * fair distribution of slice time for a process doing back-to-back - * seeks. So allow a little bit of time for him to submit a new rq. - * - * To prevent processes with (partly) seeky workloads from - * being too ill-treated, grant them a small fraction of the - * assigned budget before reducing the waiting time to - * BFQ_MIN_TT. This happened to help reduce latency. - */ - sl = bfqd->bfq_slice_idle; - if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) && - bfqq->entity.service > bfq_max_budget(bfqd) / 8 && - bfqq->raising_coeff == 1) - sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); - else if (bfqq->raising_coeff > 1) - sl = sl * 3; - bfqd->last_idling_start = ktime_get(); - mod_timer(&bfqd->idle_slice_timer, jiffies + sl); - bfq_log(bfqd, "arm idle: %u/%u ms", - jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); -} - -/* - * Set the maximum time for the active queue to consume its - * budget. This prevents seeky processes from lowering the disk - * throughput (always guaranteed with a time slice scheme as in CFQ). - */ -static void bfq_set_budget_timeout(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq = bfqd->active_queue; - unsigned int timeout_coeff; - if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time) - timeout_coeff = 1; - else - timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; - - bfqd->last_budget_start = ktime_get(); - - bfq_clear_bfqq_budget_new(bfqq); - bfqq->budget_timeout = jiffies + - bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; - - bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", - jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * - timeout_coeff)); -} - -/* - * Move request from internal lists to the request queue dispatch list. - */ -static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq = RQ_BFQQ(rq); - - bfq_remove_request(rq); - bfqq->dispatched++; - elv_dispatch_sort(q, rq); - - if (bfq_bfqq_sync(bfqq)) - bfqd->sync_flight++; -} - -/* - * Return expired entry, or NULL to just start from scratch in rbtree. - */ -static struct request *bfq_check_fifo(struct bfq_queue *bfqq) -{ - struct request *rq = NULL; - - if (bfq_bfqq_fifo_expire(bfqq)) - return NULL; - - bfq_mark_bfqq_fifo_expire(bfqq); - - if (list_empty(&bfqq->fifo)) - return NULL; - - rq = rq_entry_fifo(bfqq->fifo.next); - - if (time_before(jiffies, rq_fifo_time(rq))) - return NULL; - - return rq; -} - -static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - return entity->budget - entity->service; -} - -/* Must be called with eqm_lock held */ -static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - BUG_ON(bfqq != bfqd->active_queue); - - __bfq_bfqd_reset_active(bfqd); - - /* - * If this bfqq is shared between multiple processes, check - * to make sure that those processes are still issuing I/Os - * within the mean seek distance. If not, it may be time to - * break the queues apart again. - */ - if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) - bfq_mark_bfqq_split_coop(bfqq); - - if (RB_EMPTY_ROOT(&bfqq->sort_list)) { - /* - * overloading budget_timeout field to store when - * the queue remains with no backlog, used by - * the weight-raising mechanism - */ - bfqq->budget_timeout = jiffies ; - bfq_del_bfqq_busy(bfqd, bfqq, 1); - } - else { - bfq_activate_bfqq(bfqd, bfqq); - /* - * Resort priority tree of potential close cooperators. - */ - bfq_rq_pos_tree_add(bfqd, bfqq); - } -} - -/** - * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. - * @bfqd: device data. - * @bfqq: queue to update. - * @reason: reason for expiration. - * - * Handle the feedback on @bfqq budget. See the body for detailed - * comments. - */ -static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - enum bfqq_expiration reason) -{ - struct request *next_rq; - unsigned long budget, min_budget; - - budget = bfqq->max_budget; - min_budget = bfq_min_budget(bfqd); - - BUG_ON(bfqq != bfqd->active_queue); - - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", - bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", - budget, bfq_min_budget(bfqd)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", - bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue)); - - if (bfq_bfqq_sync(bfqq)) { - switch (reason) { - /* - * Caveat: in all the following cases we trade latency - * for throughput. - */ - case BFQ_BFQQ_TOO_IDLE: - /* - * This is the only case where we may reduce - * the budget: if there is no requets of the - * process still waiting for completion, then - * we assume (tentatively) that the timer has - * expired because the batch of requests of - * the process could have been served with a - * smaller budget. Hence, betting that - * process will behave in the same way when it - * becomes backlogged again, we reduce its - * next budget. As long as we guess right, - * this budget cut reduces the latency - * experienced by the process. - * - * However, if there are still outstanding - * requests, then the process may have not yet - * issued its next request just because it is - * still waiting for the completion of some of - * the still oustanding ones. So in this - * subcase we do not reduce its budget, on the - * contrary we increase it to possibly boost - * the throughput, as discussed in the - * comments to the BUDGET_TIMEOUT case. - */ - if (bfqq->dispatched > 0) /* still oustanding reqs */ - budget = min(budget * 2, bfqd->bfq_max_budget); - else { - if (budget > 5 * min_budget) - budget -= 4 * min_budget; - else - budget = min_budget; - } - break; - case BFQ_BFQQ_BUDGET_TIMEOUT: - /* - * We double the budget here because: 1) it - * gives the chance to boost the throughput if - * this is not a seeky process (which may have - * bumped into this timeout because of, e.g., - * ZBR), 2) together with charge_full_budget - * it helps give seeky processes higher - * timestamps, and hence be served less - * frequently. - */ - budget = min(budget * 2, bfqd->bfq_max_budget); - break; - case BFQ_BFQQ_BUDGET_EXHAUSTED: - /* - * The process still has backlog, and did not - * let either the budget timeout or the disk - * idling timeout expire. Hence it is not - * seeky, has a short thinktime and may be - * happy with a higher budget too. So - * definitely increase the budget of this good - * candidate to boost the disk throughput. - */ - budget = min(budget * 4, bfqd->bfq_max_budget); - break; - case BFQ_BFQQ_NO_MORE_REQUESTS: - /* - * Leave the budget unchanged. - */ - default: - return; - } - } else /* async queue */ - /* async queues get always the maximum possible budget - * (their ability to dispatch is limited by - * @bfqd->bfq_max_budget_async_rq). - */ - budget = bfqd->bfq_max_budget; - - bfqq->max_budget = budget; - - if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && - bfqq->max_budget > bfqd->bfq_max_budget) - bfqq->max_budget = bfqd->bfq_max_budget; - - /* - * Make sure that we have enough budget for the next request. - * Since the finish time of the bfqq must be kept in sync with - * the budget, be sure to call __bfq_bfqq_expire() after the - * update. - */ - next_rq = bfqq->next_rq; - if (next_rq != NULL) - bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, - bfq_serv_to_charge(next_rq, bfqq)); - else - bfqq->entity.budget = bfqq->max_budget; - - bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", - next_rq != NULL ? blk_rq_sectors(next_rq) : 0, - bfqq->entity.budget); -} - -static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) -{ - unsigned long max_budget; - - /* - * The max_budget calculated when autotuning is equal to the - * amount of sectors transfered in timeout_sync at the - * estimated peak rate. - */ - max_budget = (unsigned long)(peak_rate * 1000 * - timeout >> BFQ_RATE_SHIFT); - - return max_budget; -} - -/* - * In addition to updating the peak rate, checks whether the process - * is "slow", and returns 1 if so. This slow flag is used, in addition - * to the budget timeout, to reduce the amount of service provided to - * seeky processes, and hence reduce their chances to lower the - * throughput. See the code for more details. - */ -static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, - int compensate, enum bfqq_expiration reason) -{ - u64 bw, usecs, expected, timeout; - ktime_t delta; - int update = 0; - - if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) - return 0; - - if (compensate) - delta = bfqd->last_idling_start; - else - delta = ktime_get(); - delta = ktime_sub(delta, bfqd->last_budget_start); - usecs = ktime_to_us(delta); - - /* Don't trust short/unrealistic values. */ - if (usecs < 100 || usecs >= LONG_MAX) - return 0; - - /* - * Calculate the bandwidth for the last slice. We use a 64 bit - * value to store the peak rate, in sectors per usec in fixed - * point math. We do so to have enough precision in the estimate - * and to avoid overflows. - */ - bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; - do_div(bw, (unsigned long)usecs); - - timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); - - /* - * Use only long (> 20ms) intervals to filter out spikes for - * the peak rate estimation. - */ - if (usecs > 20000) { - if (bw > bfqd->peak_rate || - (!BFQQ_SEEKY(bfqq) && - reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { - bfq_log(bfqd, "measured bw =%llu", bw); - /* - * To smooth oscillations use a low-pass filter with - * alpha=7/8, i.e., - * new_rate = (7/8) * old_rate + (1/8) * bw - */ - do_div(bw, 8); - if (bw == 0) - return 0; - bfqd->peak_rate *= 7; - do_div(bfqd->peak_rate, 8); - bfqd->peak_rate += bw; - update = 1; - bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); - } - - update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; - - if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) - bfqd->peak_rate_samples++; - - if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && - update && bfqd->bfq_user_max_budget == 0) { - bfqd->bfq_max_budget = - bfq_calc_max_budget(bfqd->peak_rate, timeout); - bfq_log(bfqd, "new max_budget=%lu", - bfqd->bfq_max_budget); - } - } - - /* - * If the process has been served for a too short time - * interval to let its possible sequential accesses prevail on - * the initial seek time needed to move the disk head on the - * first sector it requested, then give the process a chance - * and for the moment return false. - */ - if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) - return 0; - - /* - * A process is considered ``slow'' (i.e., seeky, so that we - * cannot treat it fairly in the service domain, as it would - * slow down too much the other processes) if, when a slice - * ends for whatever reason, it has received service at a - * rate that would not be high enough to complete the budget - * before the budget timeout expiration. - */ - expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; - - /* - * Caveat: processes doing IO in the slower disk zones will - * tend to be slow(er) even if not seeky. And the estimated - * peak rate will actually be an average over the disk - * surface. Hence, to not be too harsh with unlucky processes, - * we keep a budget/3 margin of safety before declaring a - * process slow. - */ - return expected > (4 * bfqq->entity.budget) / 3; -} - -/** - * bfq_bfqq_expire - expire a queue. - * @bfqd: device owning the queue. - * @bfqq: the queue to expire. - * @compensate: if true, compensate for the time spent idling. - * @reason: the reason causing the expiration. - * - * - * If the process associated to the queue is slow (i.e., seeky), or in - * case of budget timeout, or, finally, if it is async, we - * artificially charge it an entire budget (independently of the - * actual service it received). As a consequence, the queue will get - * higher timestamps than the correct ones upon reactivation, and - * hence it will be rescheduled as if it had received more service - * than what it actually received. In the end, this class of processes - * will receive less service in proportion to how slowly they consume - * their budgets (and hence how seriously they tend to lower the - * throughput). - * - * In contrast, when a queue expires because it has been idling for - * too much or because it exhausted its budget, we do not touch the - * amount of service it has received. Hence when the queue will be - * reactivated and its timestamps updated, the latter will be in sync - * with the actual service received by the queue until expiration. - * - * Charging a full budget to the first type of queues and the exact - * service to the others has the effect of using the WF2Q+ policy to - * schedule the former on a timeslice basis, without violating the - * service domain guarantees of the latter. - */ -static void bfq_bfqq_expire(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - int compensate, - enum bfqq_expiration reason) -{ - int slow; - BUG_ON(bfqq != bfqd->active_queue); - - /* Update disk peak rate for autotuning and check whether the - * process is slow (see bfq_update_peak_rate). - */ - slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); - - /* - * As above explained, 'punish' slow (i.e., seeky), timed-out - * and async queues, to favor sequential sync workloads. - * - * Processes doing IO in the slower disk zones will tend to be - * slow(er) even if not seeky. Hence, since the estimated peak - * rate is actually an average over the disk surface, these - * processes may timeout just for bad luck. To avoid punishing - * them we do not charge a full budget to a process that - * succeeded in consuming at least 2/3 of its budget. - */ - if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && - bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) - bfq_bfqq_charge_full_budget(bfqq); - - if (bfqd->low_latency && bfqq->raising_coeff == 1) - bfqq->last_rais_start_finish = jiffies; - - if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) { - if(reason != BFQ_BFQQ_BUDGET_TIMEOUT) - bfqq->soft_rt_next_start = - jiffies + - HZ * bfqq->entity.service / - bfqd->bfq_raising_max_softrt_rate; - else - bfqq->soft_rt_next_start = -1; /* infinity */ - } - bfq_log_bfqq(bfqd, bfqq, - "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow, - bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); - - /* Increase, decrease or leave budget unchanged according to reason */ - __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); - spin_lock(&bfqd->eqm_lock); - __bfq_bfqq_expire(bfqd, bfqq); - spin_unlock(&bfqd->eqm_lock); -} - -/* - * Budget timeout is not implemented through a dedicated timer, but - * just checked on request arrivals and completions, as well as on - * idle timer expirations. - */ -static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) -{ - if (bfq_bfqq_budget_new(bfqq)) - return 0; - - if (time_before(jiffies, bfqq->budget_timeout)) - return 0; - - return 1; -} - -/* - * If we expire a queue that is waiting for the arrival of a new - * request, we may prevent the fictitious timestamp backshifting that - * allows the guarantees of the queue to be preserved (see [1] for - * this tricky aspect). Hence we return true only if this condition - * does not hold, or if the queue is slow enough to deserve only to be - * kicked off for preserving a high throughput. -*/ -static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) -{ - bfq_log_bfqq(bfqq->bfqd, bfqq, - "may_budget_timeout: wr %d left %d timeout %d", - bfq_bfqq_wait_request(bfqq), - bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, - bfq_bfqq_budget_timeout(bfqq)); - - return (!bfq_bfqq_wait_request(bfqq) || - bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) - && - bfq_bfqq_budget_timeout(bfqq); -} - -/* - * If the active queue is empty, but it is sync and either of the following - * conditions holds, then: 1) the queue must remain active and cannot be - * expired, and 2) the disk must be idled to wait for the possible arrival - * of a new request for the queue. The conditions are: - * - the device is rotational and not performing NCQ, and the queue has its - * idle window set (in this case, waiting for a new request for the queue - * is likely to boost the disk throughput); - * - the queue is weight-raised (waiting for the request is necessary for - * providing the queue with fairness and latency guarantees). - * - * In any case, idling can be disabled for cooperation issues, if - * 1) there is a close cooperator for the queue, or - * 2) the queue is shared and some cooperator is likely to be idle (in this - * case, by not arming the idle timer, we try to slow down the queue, to - * prevent the zones of the disk accessed by the active cooperators to - * become too distant from the zone that will be accessed by the currently - * idle cooperators). - */ -static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq, - int budg_timeout) -{ - struct bfq_data *bfqd = bfqq->bfqd; - - struct bfq_queue *coop_bfqq; - - spin_lock(&bfqd->eqm_lock); - coop_bfqq = bfq_close_cooperator(bfqd, bfqq, bfqd->last_position); - spin_unlock(&bfqd->eqm_lock); - - return (bfq_bfqq_sync(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) && - bfqd->bfq_slice_idle != 0 && - ((bfq_bfqq_idle_window(bfqq) && !bfqd->hw_tag && - !blk_queue_nonrot(bfqd->queue)) - || bfqq->raising_coeff > 1) && - (bfqd->rq_in_driver == 0 || - budg_timeout || - bfqq->raising_coeff > 1) && - !coop_bfqq && - (!bfq_bfqq_coop(bfqq) || - !bfq_bfqq_some_coop_idle(bfqq)) && - !bfq_queue_nonrot_noidle(bfqd, bfqq)); -} - -/* - * Select a queue for service. If we have a current active queue, - * check whether to continue servicing it, or retrieve and set a new one. - */ -static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq; - struct request *next_rq; - enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; - int budg_timeout; - - bfqq = bfqd->active_queue; - if (bfqq == NULL) - goto new_queue; - - bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue"); - - budg_timeout = bfq_may_expire_for_budg_timeout(bfqq); - if (budg_timeout && - !bfq_bfqq_must_idle(bfqq, budg_timeout)) - goto expire; - - next_rq = bfqq->next_rq; - /* - * If bfqq has requests queued and it has enough budget left to - * serve them, keep the queue, otherwise expire it. - */ - if (next_rq != NULL) { - if (bfq_serv_to_charge(next_rq, bfqq) > - bfq_bfqq_budget_left(bfqq)) { - reason = BFQ_BFQQ_BUDGET_EXHAUSTED; - goto expire; - } else { - /* - * The idle timer may be pending because we may not - * disable disk idling even when a new request arrives - */ - if (timer_pending(&bfqd->idle_slice_timer)) { - /* - * If we get here: 1) at least a new request - * has arrived but we have not disabled the - * timer because the request was too small, - * 2) then the block layer has unplugged the - * device, causing the dispatch to be invoked. - * - * Since the device is unplugged, now the - * requests are probably large enough to - * provide a reasonable throughput. - * So we disable idling. - */ - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); - } - goto keep_queue; - } - } - - /* - * No requests pending. If there is no cooperator, and the active - * queue still has requests in flight or is idling for a new request, - * then keep it. - */ - if (timer_pending(&bfqd->idle_slice_timer) || - (bfqq->dispatched != 0 && - (bfq_bfqq_idle_window(bfqq) || bfqq->raising_coeff > 1) && - !bfq_queue_nonrot_noidle(bfqd, bfqq))) { - bfqq = NULL; - goto keep_queue; - } - - reason = BFQ_BFQQ_NO_MORE_REQUESTS; -expire: - bfq_bfqq_expire(bfqd, bfqq, 0, reason); -new_queue: - bfqq = bfq_set_active_queue(bfqd); - bfq_log(bfqd, "select_queue: new queue %d returned", - bfqq != NULL ? bfqq->pid : 0); -keep_queue: - return bfqq; -} - -static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - if (bfqq->raising_coeff > 1) { /* queue is being boosted */ - bfq_log_bfqq(bfqd, bfqq, - "raising period dur %u/%u msec, " - "old raising coeff %u, w %d(%d)", - jiffies_to_msecs(jiffies - - bfqq->last_rais_start_finish), - jiffies_to_msecs(bfqq->raising_cur_max_time), - bfqq->raising_coeff, - bfqq->entity.weight, bfqq->entity.orig_weight); - - BUG_ON(bfqq != bfqd->active_queue && entity->weight != - entity->orig_weight * bfqq->raising_coeff); - if(entity->ioprio_changed) - bfq_log_bfqq(bfqd, bfqq, - "WARN: pending prio change"); - /* - * If too much time has elapsed from the beginning - * of this weight-raising period and process is not soft - * real-time, stop it - */ - if (jiffies - bfqq->last_rais_start_finish > - bfqq->raising_cur_max_time) { - int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && - bfqq->soft_rt_next_start < jiffies; - - bfqq->last_rais_start_finish = jiffies; - if (soft_rt) - bfqq->raising_cur_max_time = - bfqd->bfq_raising_rt_max_time; - else - bfq_bfqq_end_raising(bfqq); - } - } - /* Update weight both if it must be raised and if it must be lowered */ - if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1)) - __bfq_entity_update_weight_prio( - bfq_entity_service_tree(entity), - entity); -} - -/* - * Dispatch one request from bfqq, moving it to the request queue - * dispatch list. - */ -static int bfq_dispatch_request(struct bfq_data *bfqd, - struct bfq_queue *bfqq) -{ - int dispatched = 0; - struct request *rq; - unsigned long service_to_charge; - - BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); - - /* Follow expired path, else get first next available. */ - rq = bfq_check_fifo(bfqq); - if (rq == NULL) - rq = bfqq->next_rq; - service_to_charge = bfq_serv_to_charge(rq, bfqq); - - if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { - /* - * This may happen if the next rq is chosen - * in fifo order instead of sector order. - * The budget is properly dimensioned - * to be always sufficient to serve the next request - * only if it is chosen in sector order. The reason is - * that it would be quite inefficient and little useful - * to always make sure that the budget is large enough - * to serve even the possible next rq in fifo order. - * In fact, requests are seldom served in fifo order. - * - * Expire the queue for budget exhaustion, and - * make sure that the next act_budget is enough - * to serve the next request, even if it comes - * from the fifo expired path. - */ - bfqq->next_rq = rq; - /* - * Since this dispatch is failed, make sure that - * a new one will be performed - */ - if (!bfqd->rq_in_driver) - bfq_schedule_dispatch(bfqd); - goto expire; - } - - /* Finally, insert request into driver dispatch list. */ - bfq_bfqq_served(bfqq, service_to_charge); - bfq_dispatch_insert(bfqd->queue, rq); - - update_raising_data(bfqd, bfqq); - - bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), " - "budg left %lu", - blk_rq_sectors(rq), - (long long unsigned)blk_rq_pos(rq), - bfq_bfqq_budget_left(bfqq)); - - dispatched++; - - if (bfqd->active_cic == NULL) { - atomic_long_inc(&RQ_CIC(rq)->ioc->refcount); - bfqd->active_cic = RQ_CIC(rq); - } - - if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && - dispatched >= bfqd->bfq_max_budget_async_rq) || - bfq_class_idle(bfqq))) - goto expire; - - return dispatched; - -expire: - bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); - return dispatched; -} - -static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) -{ - int dispatched = 0; - - while (bfqq->next_rq != NULL) { - bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); - dispatched++; - } - - BUG_ON(!list_empty(&bfqq->fifo)); - return dispatched; -} - -/* - * Drain our current requests. Used for barriers and when switching - * io schedulers on-the-fly. - */ -static int bfq_forced_dispatch(struct bfq_data *bfqd) -{ - struct bfq_queue *bfqq, *n; - struct bfq_service_tree *st; - int dispatched = 0; - - bfqq = bfqd->active_queue; - if (bfqq != NULL) { - spin_lock(&bfqd->eqm_lock); - __bfq_bfqq_expire(bfqd, bfqq); - spin_unlock(&bfqd->eqm_lock); - } - - /* - * Loop through classes, and be careful to leave the scheduler - * in a consistent state, as feedback mechanisms and vtime - * updates cannot be disabled during the process. - */ - list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { - st = bfq_entity_service_tree(&bfqq->entity); - - dispatched += __bfq_forced_dispatch_bfqq(bfqq); - bfqq->max_budget = bfq_max_budget(bfqd); - - bfq_forget_idle(st); - } - - BUG_ON(bfqd->busy_queues != 0); - - return dispatched; -} - -static int bfq_dispatch_requests(struct request_queue *q, int force) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq; - int max_dispatch; - - bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); - if (bfqd->busy_queues == 0) - return 0; - - if (unlikely(force)) - return bfq_forced_dispatch(bfqd); - - if((bfqq = bfq_select_queue(bfqd)) == NULL) - return 0; - - max_dispatch = bfqd->bfq_quantum; - if (bfq_class_idle(bfqq)) - max_dispatch = 1; - - if (!bfq_bfqq_sync(bfqq)) - max_dispatch = bfqd->bfq_max_budget_async_rq; - - if (bfqq->dispatched >= max_dispatch) { - if (bfqd->busy_queues > 1) - return 0; - if (bfqq->dispatched >= 4 * max_dispatch) - return 0; - } - - if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) - return 0; - - bfq_clear_bfqq_wait_request(bfqq); - BUG_ON(timer_pending(&bfqd->idle_slice_timer)); - - if (! bfq_dispatch_request(bfqd, bfqq)) - return 0; - - bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d" - "(max_disp %d)", bfqq->pid, max_dispatch); - - return 1; -} - -/* - * Task holds one reference to the queue, dropped when task exits. Each rq - * in-flight on this queue also holds a reference, dropped when rq is freed. - * - * Queue lock must be held here. - */ -static void bfq_put_queue(struct bfq_queue *bfqq) -{ - struct bfq_data *bfqd = bfqq->bfqd; - - BUG_ON(atomic_read(&bfqq->ref) <= 0); - - bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, - atomic_read(&bfqq->ref)); - if (!atomic_dec_and_test(&bfqq->ref)) - return; - - BUG_ON(rb_first(&bfqq->sort_list) != NULL); - BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); - BUG_ON(bfqq->entity.tree != NULL); - BUG_ON(bfq_bfqq_busy(bfqq)); - BUG_ON(bfqd->active_queue == bfqq); - - bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); - - kmem_cache_free(bfq_pool, bfqq); -} - -static void bfq_put_cooperator(struct bfq_queue *bfqq) -{ - struct bfq_queue *__bfqq, *next; - - /* - * If this queue was scheduled to merge with another queue, be - * sure to drop the reference taken on that queue (and others in - * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. - */ - __bfqq = bfqq->new_bfqq; - while (__bfqq) { - if (__bfqq == bfqq) { - WARN(1, "bfqq->new_bfqq loop detected.\n"); - break; - } - next = __bfqq->new_bfqq; - bfq_put_queue(__bfqq); - __bfqq = next; - } -} - -/* Coop lock is taken in __bfq_exit_single_io_context() */ -static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - if (bfqq == bfqd->active_queue) { - __bfq_bfqq_expire(bfqd, bfqq); - bfq_schedule_dispatch(bfqd); - } - - bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, - atomic_read(&bfqq->ref)); - - bfq_put_cooperator(bfqq); - - bfq_put_queue(bfqq); -} - -/* - * Update the entity prio values; note that the new values will not - * be used until the next (re)activation. - */ -static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) -{ - struct task_struct *tsk = current; - int ioprio_class; - - if (!bfq_bfqq_prio_changed(bfqq)) - return; - - ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); - switch (ioprio_class) { - default: - printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class); - case IOPRIO_CLASS_NONE: - /* - * No prio set, inherit CPU scheduling settings. - */ - bfqq->entity.new_ioprio = task_nice_ioprio(tsk); - bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); - break; - case IOPRIO_CLASS_RT: - bfqq->entity.new_ioprio = task_ioprio(ioc); - bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; - break; - case IOPRIO_CLASS_BE: - bfqq->entity.new_ioprio = task_ioprio(ioc); - bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; - break; - case IOPRIO_CLASS_IDLE: - bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; - bfqq->entity.new_ioprio = 7; - bfq_clear_bfqq_idle_window(bfqq); - break; - } - - bfqq->entity.ioprio_changed = 1; - - /* - * Keep track of original prio settings in case we have to temporarily - * elevate the priority of this queue. - */ - bfqq->org_ioprio = bfqq->entity.new_ioprio; - bfq_clear_bfqq_prio_changed(bfqq); -} - -static void bfq_changed_ioprio(struct io_context *ioc, - struct cfq_io_context *cic) -{ - struct bfq_data *bfqd; - struct bfq_queue *bfqq, *new_bfqq; - struct bfq_group *bfqg; - unsigned long uninitialized_var(flags); - - bfqd = bfq_get_bfqd_locked(&cic->key, &flags); - if (unlikely(bfqd == NULL)) - return; - - spin_lock(&bfqd->eqm_lock); - bfqq = cic->cfqq[BLK_RW_ASYNC]; - if (bfqq != NULL) { - bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, - sched_data); - new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, cic->ioc, - GFP_ATOMIC); - if (new_bfqq != NULL) { - cic->cfqq[BLK_RW_ASYNC] = new_bfqq; - bfq_log_bfqq(bfqd, bfqq, - "changed_ioprio: bfqq %p %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - } - } - - bfqq = cic->cfqq[BLK_RW_SYNC]; - spin_unlock(&bfqd->eqm_lock); - if (bfqq != NULL) - bfq_mark_bfqq_prio_changed(bfqq); - - bfq_put_bfqd_unlock(bfqd, &flags); -} - -static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - pid_t pid, int is_sync) -{ - RB_CLEAR_NODE(&bfqq->entity.rb_node); - INIT_LIST_HEAD(&bfqq->fifo); - - atomic_set(&bfqq->ref, 0); - bfqq->bfqd = bfqd; - - bfq_mark_bfqq_prio_changed(bfqq); - - if (is_sync) { - if (!bfq_class_idle(bfqq)) - bfq_mark_bfqq_idle_window(bfqq); - bfq_mark_bfqq_sync(bfqq); - } - - /* Tentative initial value to trade off between thr and lat */ - bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; - bfqq->pid = pid; - - bfqq->raising_coeff = 1; - bfqq->last_rais_start_finish = 0; - bfqq->soft_rt_next_start = -1; -} - -static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, - struct bfq_group *bfqg, - int is_sync, - struct io_context *ioc, - gfp_t gfp_mask) -{ - struct bfq_queue *bfqq, *new_bfqq = NULL; - struct cfq_io_context *cic; - -retry: - cic = bfq_cic_lookup(bfqd, ioc); - /* cic always exists here */ - bfqq = cic_to_bfqq(cic, is_sync); - - /* - * Always try a new alloc if we fall back to the OOM bfqq - * originally, since it should just be a temporary situation. - */ - if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { - bfqq = NULL; - if (new_bfqq != NULL) { - bfqq = new_bfqq; - new_bfqq = NULL; - } else if (gfp_mask & __GFP_WAIT) { - spin_unlock(&bfqd->eqm_lock); - spin_unlock_irq(bfqd->queue->queue_lock); - new_bfqq = kmem_cache_alloc_node(bfq_pool, - gfp_mask | __GFP_ZERO, - bfqd->queue->node); - spin_lock_irq(bfqd->queue->queue_lock); - spin_lock(&bfqd->eqm_lock); - if (new_bfqq != NULL) - goto retry; - } else { - bfqq = kmem_cache_alloc_node(bfq_pool, - gfp_mask | __GFP_ZERO, - bfqd->queue->node); - } - - if (bfqq != NULL) { - bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); - bfq_log_bfqq(bfqd, bfqq, "allocated"); - } else { - bfqq = &bfqd->oom_bfqq; - bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); - } - - bfq_init_prio_data(bfqq, ioc); - bfq_init_entity(&bfqq->entity, bfqg); - } - - if (new_bfqq != NULL) - kmem_cache_free(bfq_pool, new_bfqq); - - return bfqq; -} - -static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, - struct bfq_group *bfqg, - int ioprio_class, int ioprio) -{ - switch (ioprio_class) { - case IOPRIO_CLASS_RT: - return &bfqg->async_bfqq[0][ioprio]; - case IOPRIO_CLASS_BE: - return &bfqg->async_bfqq[1][ioprio]; - case IOPRIO_CLASS_IDLE: - return &bfqg->async_idle_bfqq; - default: - BUG(); - } -} - -static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, - struct bfq_group *bfqg, int is_sync, - struct io_context *ioc, gfp_t gfp_mask) -{ - const int ioprio = task_ioprio(ioc); - const int ioprio_class = task_ioprio_class(ioc); - struct bfq_queue **async_bfqq = NULL; - struct bfq_queue *bfqq = NULL; - - if (!is_sync) { - async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, - ioprio); - bfqq = *async_bfqq; - } - - if (bfqq == NULL) - bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask); - - /* - * Pin the queue now that it's allocated, scheduler exit will prune it. - */ - if (!is_sync && *async_bfqq == NULL) { - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", - bfqq, atomic_read(&bfqq->ref)); - *async_bfqq = bfqq; - } - - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, - atomic_read(&bfqq->ref)); - return bfqq; -} - -static void bfq_update_io_thinktime(struct bfq_data *bfqd, - struct cfq_io_context *cic) -{ - unsigned long elapsed = jiffies - cic->ttime.last_end_request; - unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); - - cic->ttime.ttime_samples = (7*cic->ttime.ttime_samples + 256) / 8; - cic->ttime.ttime_total = (7*cic->ttime.ttime_total + 256*ttime) / 8; - cic->ttime.ttime_mean = (cic->ttime.ttime_total + 128) / cic->ttime.ttime_samples; -} - -static void bfq_update_io_seektime(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct request *rq) -{ - sector_t sdist; - u64 total; - - if (bfqq->last_request_pos < blk_rq_pos(rq)) - sdist = blk_rq_pos(rq) - bfqq->last_request_pos; - else - sdist = bfqq->last_request_pos - blk_rq_pos(rq); - - /* - * Don't allow the seek distance to get too large from the - * odd fragment, pagein, etc. - */ - if (bfqq->seek_samples == 0) /* first request, not really a seek */ - sdist = 0; - else if (bfqq->seek_samples <= 60) /* second & third seek */ - sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); - else - sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); - - bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; - bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; - total = bfqq->seek_total + (bfqq->seek_samples/2); - do_div(total, bfqq->seek_samples); - if (bfq_bfqq_coop(bfqq)) { - /* - * If the mean seektime increases for a (non-seeky) shared - * queue, some cooperator is likely to be idling too much. - * On the contrary, if it decreases, some cooperator has - * probably waked up. - * - */ - if ((sector_t)total < bfqq->seek_mean) - bfq_mark_bfqq_some_coop_idle(bfqq) ; - else if ((sector_t)total > bfqq->seek_mean) - bfq_clear_bfqq_some_coop_idle(bfqq) ; - } - bfqq->seek_mean = (sector_t)total; - - bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, - (u64)bfqq->seek_mean); -} - -/* - * Disable idle window if the process thinks too long or seeks so much that - * it doesn't matter. - */ -static void bfq_update_idle_window(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct cfq_io_context *cic) -{ - int enable_idle; - - /* Don't idle for async or idle io prio class. */ - if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) - return; - - /* Idle window just restored, statistics are meaningless. */ - if (bfq_bfqq_just_split(bfqq)) - return; - - enable_idle = bfq_bfqq_idle_window(bfqq); - - if (atomic_read(&cic->ioc->nr_tasks) == 0 || - bfqd->bfq_slice_idle == 0 || - (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && - bfqq->raising_coeff == 1)) - enable_idle = 0; - else if (bfq_sample_valid(cic->ttime.ttime_samples)) { - if (cic->ttime.ttime_mean > bfqd->bfq_slice_idle && - bfqq->raising_coeff == 1) - enable_idle = 0; - else - enable_idle = 1; - } - bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", - enable_idle); - - if (enable_idle) - bfq_mark_bfqq_idle_window(bfqq); - else - bfq_clear_bfqq_idle_window(bfqq); -} - -/* - * Called when a new fs request (rq) is added to bfqq. Check if there's - * something we should do about it. - */ -static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct request *rq) -{ - struct cfq_io_context *cic = RQ_CIC(rq); - - if (rq->cmd_flags & REQ_META) - bfqq->meta_pending++; - - bfq_update_io_thinktime(bfqd, cic); - bfq_update_io_seektime(bfqd, bfqq, rq); - if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || - !BFQQ_SEEKY(bfqq)) - bfq_update_idle_window(bfqd, bfqq, cic); - bfq_clear_bfqq_just_split(bfqq); - - bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", - bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), - (long long unsigned)bfqq->seek_mean); - - bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); - - if (bfqq == bfqd->active_queue) { - /* - * If there is just this request queued and the request - * is small, just exit. - * In this way, if the disk is being idled to wait for a new - * request from the active queue, we avoid unplugging the - * device now. - * - * By doing so, we spare the disk to be committed - * to serve just a small request. On the contrary, we wait for - * the block layer to decide when to unplug the device: - * hopefully, new requests will be merged to this - * one quickly, then the device will be unplugged - * and larger requests will be dispatched. - */ - if (bfqq->queued[rq_is_sync(rq)] == 1 && - blk_rq_sectors(rq) < 32) { - return; - } - if (bfq_bfqq_wait_request(bfqq)) { - /* - * If we are waiting for a request for this queue, let - * it rip immediately and flag that we must not expire - * this queue just now. - */ - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); - /* - * Here we can safely expire the queue, in - * case of budget timeout, without wasting - * guarantees - */ - if (bfq_bfqq_budget_timeout(bfqq)) - bfq_bfqq_expire(bfqd, bfqq, 0, - BFQ_BFQQ_BUDGET_TIMEOUT); - __blk_run_queue(bfqd->queue); - } - } -} - -static void bfq_insert_request(struct request_queue *q, struct request *rq) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; - - assert_spin_locked(bfqd->queue->queue_lock); - - /* - * An unplug may trigger a requeue of a request from the device - * driver: make sure we are in process context while trying to - * merge two bfq_queues. - */ - spin_lock(&bfqd->eqm_lock); - if (!in_interrupt() && - (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true))) { - if (cic_to_bfqq(RQ_CIC(rq), 1) != bfqq) - new_bfqq = cic_to_bfqq(RQ_CIC(rq), 1); - /* - * Release the request's reference to the old bfqq - * and make sure one is taken to the shared queue. - */ - new_bfqq->allocated[rq_data_dir(rq)]++; - bfqq->allocated[rq_data_dir(rq)]--; - atomic_inc(&new_bfqq->ref); - bfq_put_queue(bfqq); - if (cic_to_bfqq(RQ_CIC(rq), 1) == bfqq) - bfq_merge_bfqqs(bfqd, RQ_CIC(rq), bfqq, new_bfqq); - rq->elevator_private[1] = new_bfqq; - bfqq = new_bfqq; - } - spin_unlock(&bfqd->eqm_lock); - - bfq_init_prio_data(bfqq, RQ_CIC(rq)->ioc); - - bfq_add_rq_rb(rq); - - /* - * Here a newly-created bfq_queue has already started a weight-raising - * period: clear raising_time_left to prevent bfq_bfqq_save_state() - * from assigning it a full weight-raising period. See the detailed - * comments about this field in bfq_init_icq(). - */ - if (bfqq->cic != NULL) - bfqq->cic->raising_time_left = 0; - rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); - list_add_tail(&rq->queuelist, &bfqq->fifo); - - bfq_rq_enqueued(bfqd, bfqq, rq); -} - -static void bfq_update_hw_tag(struct bfq_data *bfqd) -{ - bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, - bfqd->rq_in_driver); - - if (bfqd->hw_tag == 1) - return; - - /* - * This sample is valid if the number of outstanding requests - * is large enough to allow a queueing behavior. Note that the - * sum is not exact, as it's not taking into account deactivated - * requests. - */ - if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) - return; - - if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) - return; - - bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; - bfqd->max_rq_in_driver = 0; - bfqd->hw_tag_samples = 0; -} - -static void bfq_completed_request(struct request_queue *q, struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - struct bfq_data *bfqd = bfqq->bfqd; - const int sync = rq_is_sync(rq); - - bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)", - blk_rq_sectors(rq), sync); - - bfq_update_hw_tag(bfqd); - - WARN_ON(!bfqd->rq_in_driver); - WARN_ON(!bfqq->dispatched); - bfqd->rq_in_driver--; - bfqq->dispatched--; - - if (bfq_bfqq_sync(bfqq)) - bfqd->sync_flight--; - - if (sync) - RQ_CIC(rq)->ttime.last_end_request = jiffies; - - /* - * If this is the active queue, check if it needs to be expired, - * or if we want to idle in case it has no pending requests. - */ - if (bfqd->active_queue == bfqq) { - int budg_timeout = bfq_may_expire_for_budg_timeout(bfqq); - if (bfq_bfqq_budget_new(bfqq)) - bfq_set_budget_timeout(bfqd); - - if (bfq_bfqq_must_idle(bfqq, budg_timeout)) - bfq_arm_slice_timer(bfqd); - else if (budg_timeout) - bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); - } - - if (!bfqd->rq_in_driver) - bfq_schedule_dispatch(bfqd); -} - -static inline int __bfq_may_queue(struct bfq_queue *bfqq) -{ - if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { - bfq_clear_bfqq_must_alloc(bfqq); - return ELV_MQUEUE_MUST; - } - - return ELV_MQUEUE_MAY; -} - -static int bfq_may_queue(struct request_queue *q, int rw) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct task_struct *tsk = current; - struct cfq_io_context *cic; - struct bfq_queue *bfqq; - - /* - * Don't force setup of a queue from here, as a call to may_queue - * does not necessarily imply that a request actually will be queued. - * So just lookup a possibly existing queue, or return 'may queue' - * if that fails. - */ - cic = bfq_cic_lookup(bfqd, tsk->io_context); - if (cic == NULL) - return ELV_MQUEUE_MAY; - - spin_lock(&bfqd->eqm_lock); - bfqq = cic_to_bfqq(cic, rw_is_sync(rw)); - spin_unlock(&bfqd->eqm_lock); - if (bfqq != NULL) { - bfq_init_prio_data(bfqq, cic->ioc); - - return __bfq_may_queue(bfqq); - } - - return ELV_MQUEUE_MAY; -} - -/* - * Queue lock held here. - */ -static void bfq_put_request(struct request *rq) -{ - struct bfq_queue *bfqq = RQ_BFQQ(rq); - - if (bfqq != NULL) { - const int rw = rq_data_dir(rq); - - BUG_ON(!bfqq->allocated[rw]); - bfqq->allocated[rw]--; - - put_io_context(RQ_CIC(rq)->ioc); - - rq->elevator_private[0] = NULL; - rq->elevator_private[1] = NULL; - - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - } -} - -/* - * Returns NULL if a new bfqq should be allocated, or the old bfqq if this - * was the last process referring to said bfqq. - */ -static struct bfq_queue * -bfq_split_bfqq(struct cfq_io_context *cic, struct bfq_queue *bfqq) -{ - bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); - - put_io_context(cic->ioc); - - if (bfqq_process_refs(bfqq) == 1) { - bfqq->pid = current->pid; - bfq_clear_bfqq_some_coop_idle(bfqq); - bfq_clear_bfqq_coop(bfqq); - bfq_clear_bfqq_split_coop(bfqq); - return bfqq; - } - - cic_set_bfqq(cic, NULL, 1); - - bfq_put_cooperator(bfqq); - - bfq_put_queue(bfqq); - return NULL; -} - -/* - * Allocate bfq data structures associated with this request. - */ -static int bfq_set_request(struct request_queue *q, struct request *rq, - gfp_t gfp_mask) -{ - struct bfq_data *bfqd = q->elevator->elevator_data; - struct cfq_io_context *cic; - const int rw = rq_data_dir(rq); - const int is_sync = rq_is_sync(rq); - struct bfq_queue *bfqq; - struct bfq_group *bfqg; - unsigned long flags; - bool split = false; - - might_sleep_if(gfp_mask & __GFP_WAIT); - - cic = bfq_get_io_context(bfqd, gfp_mask); - - spin_lock_irqsave(q->queue_lock, flags); - - if (cic == NULL) - goto queue_fail; - - bfqg = bfq_cic_update_cgroup(cic); - - spin_lock(&bfqd->eqm_lock); - -new_queue: - bfqq = cic_to_bfqq(cic, is_sync); - if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { - bfqq = bfq_get_queue(bfqd, bfqg, is_sync, cic->ioc, gfp_mask); - cic_set_bfqq(cic, bfqq, is_sync); - } else { - /* If the queue was seeky for too long, break it apart. */ - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { - bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); - bfqq = bfq_split_bfqq(cic, bfqq); - split = true; - if (!bfqq) - goto new_queue; - } - } - - bfqq->allocated[rw]++; - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, - atomic_read(&bfqq->ref)); - - rq->elevator_private[0] = cic; - rq->elevator_private[1] = bfqq; - - /* - * If a bfq_queue has only one process reference, it is owned - * by only one cfq_io_context: we can set the cic field of the - * bfq_queue to the address of that structure. Also, if the - * queue has just been split, mark a flag so that the - * information is available to the other scheduler hooks. - */ - if (bfqq_process_refs(bfqq) == 1) { - bfqq->cic = cic; - if (split) { - bfq_mark_bfqq_just_split(bfqq); - /* - * If the queue has just been split from a shared queue, - * restore the idle window and the possible weight - * raising period. - */ - bfq_bfqq_resume_state(bfqq, cic); - } - } - - spin_unlock(&bfqd->eqm_lock); - spin_unlock_irqrestore(q->queue_lock, flags); - - return 0; - -queue_fail: - if (cic != NULL) - put_io_context(cic->ioc); - - bfq_schedule_dispatch(bfqd); - spin_unlock_irqrestore(q->queue_lock, flags); - - return 1; -} - -static void bfq_kick_queue(struct work_struct *work) -{ - struct bfq_data *bfqd = - container_of(work, struct bfq_data, unplug_work); - struct request_queue *q = bfqd->queue; - - spin_lock_irq(q->queue_lock); - __blk_run_queue(q); - spin_unlock_irq(q->queue_lock); -} - -/* - * Handler of the expiration of the timer running if the active_queue - * is idling inside its time slice. - */ -static void bfq_idle_slice_timer(unsigned long data) -{ - struct bfq_data *bfqd = (struct bfq_data *)data; - struct bfq_queue *bfqq; - unsigned long flags; - enum bfqq_expiration reason; - - spin_lock_irqsave(bfqd->queue->queue_lock, flags); - - bfqq = bfqd->active_queue; - /* - * Theoretical race here: active_queue can be NULL or different - * from the queue that was idling if the timer handler spins on - * the queue_lock and a new request arrives for the current - * queue and there is a full dispatch cycle that changes the - * active_queue. This can hardly happen, but in the worst case - * we just expire a queue too early. - */ - if (bfqq != NULL) { - bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); - if (bfq_bfqq_budget_timeout(bfqq)) - /* - * Also here the queue can be safely expired - * for budget timeout without wasting - * guarantees - */ - reason = BFQ_BFQQ_BUDGET_TIMEOUT; - else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) - /* - * The queue may not be empty upon timer expiration, - * because we may not disable the timer when the first - * request of the active queue arrives during - * disk idling - */ - reason = BFQ_BFQQ_TOO_IDLE; - else - goto schedule_dispatch; - - bfq_bfqq_expire(bfqd, bfqq, 1, reason); - } - -schedule_dispatch: - bfq_schedule_dispatch(bfqd); - - spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); -} - -static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) -{ - del_timer_sync(&bfqd->idle_slice_timer); - cancel_work_sync(&bfqd->unplug_work); -} - -static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, - struct bfq_queue **bfqq_ptr) -{ - struct bfq_group *root_group = bfqd->root_group; - struct bfq_queue *bfqq = *bfqq_ptr; - - bfq_log(bfqd, "put_async_bfqq: %p", bfqq); - if (bfqq != NULL) { - bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); - bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - *bfqq_ptr = NULL; - } -} - -/* - * Release all the bfqg references to its async queues. If we are - * deallocating the group these queues may still contain requests, so - * we reparent them to the root cgroup (i.e., the only one that will - * exist for sure untill all the requests on a device are gone). - */ -static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) -{ - int i, j; - - for (i = 0; i < 2; i++) - for (j = 0; j < IOPRIO_BE_NR; j++) - __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); - - __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); -} - -static void bfq_exit_queue(struct elevator_queue *e) -{ - struct bfq_data *bfqd = e->elevator_data; - struct request_queue *q = bfqd->queue; - struct bfq_queue *bfqq, *n; - struct cfq_io_context *cic; - - bfq_shutdown_timer_wq(bfqd); - - spin_lock_irq(q->queue_lock); - - while (!list_empty(&bfqd->cic_list)) { - cic = list_entry(bfqd->cic_list.next, struct cfq_io_context, - queue_list); - __bfq_exit_single_io_context(bfqd, cic); - } - - BUG_ON(bfqd->active_queue != NULL); - list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) - bfq_deactivate_bfqq(bfqd, bfqq, 0); - - bfq_disconnect_groups(bfqd); - spin_unlock_irq(q->queue_lock); - - bfq_shutdown_timer_wq(bfqd); - - spin_lock(&cic_index_lock); - ida_remove(&cic_index_ida, bfqd->cic_index); - spin_unlock(&cic_index_lock); - - /* Wait for cic->key accessors to exit their grace periods. */ - synchronize_rcu(); - - BUG_ON(timer_pending(&bfqd->idle_slice_timer)); - - bfq_free_root_group(bfqd); - kfree(bfqd); -} - -static int bfq_alloc_cic_index(void) -{ - int index, error; - - do { - if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(&cic_index_lock); - error = ida_get_new(&cic_index_ida, &index); - spin_unlock(&cic_index_lock); - if (error && error != -EAGAIN) - return error; - } while (error); - - return index; -} - -static void *bfq_init_queue(struct request_queue *q) -{ - struct bfq_group *bfqg; - struct bfq_data *bfqd; - int i; - - i = bfq_alloc_cic_index(); - if (i < 0) - return NULL; - - bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node); - if (bfqd == NULL) - return NULL; - - bfqd->cic_index = i; - - /* - * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. - * Grab a permanent reference to it, so that the normal code flow - * will not attempt to free it. - */ - bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); - atomic_inc(&bfqd->oom_bfqq.ref); - - spin_lock_init(&bfqd->eqm_lock); - INIT_LIST_HEAD(&bfqd->cic_list); - - bfqd->queue = q; - - bfqg = bfq_alloc_root_group(bfqd, q->node); - if (bfqg == NULL) { - kfree(bfqd); - return NULL; - } - - bfqd->root_group = bfqg; - - init_timer(&bfqd->idle_slice_timer); - bfqd->idle_slice_timer.function = bfq_idle_slice_timer; - bfqd->idle_slice_timer.data = (unsigned long)bfqd; - - bfqd->rq_pos_tree = RB_ROOT; - - INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); - - INIT_LIST_HEAD(&bfqd->active_list); - INIT_LIST_HEAD(&bfqd->idle_list); - - bfqd->hw_tag = -1; - - bfqd->bfq_max_budget = bfq_default_max_budget; - - bfqd->bfq_quantum = bfq_quantum; - bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; - bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; - bfqd->bfq_back_max = bfq_back_max; - bfqd->bfq_back_penalty = bfq_back_penalty; - bfqd->bfq_slice_idle = bfq_slice_idle; - bfqd->bfq_class_idle_last_service = 0; - bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; - bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; - bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; - - bfqd->low_latency = true; - - bfqd->bfq_raising_coeff = 20; - bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300); - bfqd->bfq_raising_max_time = 0; - bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000); - bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500); - bfqd->bfq_raising_max_softrt_rate = 7000; - - /* Initially estimate the device's peak rate as the reference rate */ - if (blk_queue_nonrot(bfqd->queue)) { - bfqd->RT_prod = R_nonrot * T_nonrot; - bfqd->peak_rate = R_nonrot; - } else { - bfqd->RT_prod = R_rot * T_rot; - bfqd->peak_rate = R_rot; - } - - return bfqd; -} - -static void bfq_slab_kill(void) -{ - if (bfq_pool != NULL) - kmem_cache_destroy(bfq_pool); - if (bfq_ioc_pool != NULL) - kmem_cache_destroy(bfq_ioc_pool); -} - -static int __init bfq_slab_setup(void) -{ - bfq_pool = KMEM_CACHE(bfq_queue, 0); - if (bfq_pool == NULL) - goto fail; - - bfq_ioc_pool = kmem_cache_create("bfq_io_context", - sizeof(struct cfq_io_context), - __alignof__(struct cfq_io_context), - 0, NULL); - if (bfq_ioc_pool == NULL) - goto fail; - - return 0; -fail: - bfq_slab_kill(); - return -ENOMEM; -} - -static ssize_t bfq_var_show(unsigned int var, char *page) -{ - return sprintf(page, "%d\n", var); -} - -static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count) -{ - unsigned long new_val; - int ret = strict_strtoul(page, 10, &new_val); - - if (ret == 0) - *var = new_val; - - return count; -} - -static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page) -{ - struct bfq_data *bfqd = e->elevator_data; - return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ? - jiffies_to_msecs(bfqd->bfq_raising_max_time) : - jiffies_to_msecs(bfq_wrais_duration(bfqd))); -} - -static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) -{ - struct bfq_queue *bfqq; - struct bfq_data *bfqd = e->elevator_data; - ssize_t num_char = 0; - - spin_lock_irq(bfqd->queue->queue_lock); - - num_char += sprintf(page + num_char, "Active:\n"); - list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { - num_char += sprintf(page + num_char, - "pid%d: weight %hu, dur %d/%u\n", - bfqq->pid, - bfqq->entity.weight, - jiffies_to_msecs(jiffies - - bfqq->last_rais_start_finish), - jiffies_to_msecs(bfqq->raising_cur_max_time)); - } - num_char += sprintf(page + num_char, "Idle:\n"); - list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { - num_char += sprintf(page + num_char, - "pid%d: weight %hu, dur %d/%u\n", - bfqq->pid, - bfqq->entity.weight, - jiffies_to_msecs(jiffies - - bfqq->last_rais_start_finish), - jiffies_to_msecs(bfqq->raising_cur_max_time)); - } - - spin_unlock_irq(bfqd->queue->queue_lock); - - return num_char; -} - -#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -static ssize_t __FUNC(struct elevator_queue *e, char *page) \ -{ \ - struct bfq_data *bfqd = e->elevator_data; \ - unsigned int __data = __VAR; \ - if (__CONV) \ - __data = jiffies_to_msecs(__data); \ - return bfq_var_show(__data, (page)); \ -} -SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); -SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); -SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); -SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); -SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); -SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); -SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); -SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0); -SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); -SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); -SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); -SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0); -SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1); -SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time, - 1); -SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show, - bfqd->bfq_raising_min_inter_arr_async, - 1); -SHOW_FUNCTION(bfq_raising_max_softrt_rate_show, - bfqd->bfq_raising_max_softrt_rate, 0); -#undef SHOW_FUNCTION - -#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -static ssize_t \ -__FUNC(struct elevator_queue *e, const char *page, size_t count) \ -{ \ - struct bfq_data *bfqd = e->elevator_data; \ - unsigned long uninitialized_var(__data); \ - int ret = bfq_var_store(&__data, (page), count); \ - if (__data < (MIN)) \ - __data = (MIN); \ - else if (__data > (MAX)) \ - __data = (MAX); \ - if (__CONV) \ - *(__PTR) = msecs_to_jiffies(__data); \ - else \ - *(__PTR) = __data; \ - return ret; \ -} -STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); -STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, - INT_MAX, 1); -STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, - INT_MAX, 1); -STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); -STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, - INT_MAX, 0); -STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); -STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, - 1, INT_MAX, 0); -STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, - INT_MAX, 1); -STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1, - INT_MAX, 0); -STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0, - INT_MAX, 1); -STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0, - INT_MAX, 1); -STORE_FUNCTION(bfq_raising_min_idle_time_store, - &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1); -STORE_FUNCTION(bfq_raising_min_inter_arr_async_store, - &bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1); -STORE_FUNCTION(bfq_raising_max_softrt_rate_store, - &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0); -#undef STORE_FUNCTION - -/* do nothing for the moment */ -static ssize_t bfq_weights_store(struct elevator_queue *e, - const char *page, size_t count) -{ - return count; -} - -static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) -{ - u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); - - if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) - return bfq_calc_max_budget(bfqd->peak_rate, timeout); - else - return bfq_default_max_budget; -} - -static ssize_t bfq_max_budget_store(struct elevator_queue *e, - const char *page, size_t count) -{ - struct bfq_data *bfqd = e->elevator_data; - unsigned long uninitialized_var(__data); - int ret = bfq_var_store(&__data, (page), count); - - if (__data == 0) - bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); - else { - if (__data > INT_MAX) - __data = INT_MAX; - bfqd->bfq_max_budget = __data; - } - - bfqd->bfq_user_max_budget = __data; - - return ret; -} - -static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, - const char *page, size_t count) -{ - struct bfq_data *bfqd = e->elevator_data; - unsigned long uninitialized_var(__data); - int ret = bfq_var_store(&__data, (page), count); - - if (__data < 1) - __data = 1; - else if (__data > INT_MAX) - __data = INT_MAX; - - bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); - if (bfqd->bfq_user_max_budget == 0) - bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); - - return ret; -} - -static ssize_t bfq_low_latency_store(struct elevator_queue *e, - const char *page, size_t count) -{ - struct bfq_data *bfqd = e->elevator_data; - unsigned long uninitialized_var(__data); - int ret = bfq_var_store(&__data, (page), count); - - if (__data > 1) - __data = 1; - if (__data == 0 && bfqd->low_latency != 0) - bfq_end_raising(bfqd); - bfqd->low_latency = __data; - - return ret; -} - -#define BFQ_ATTR(name) \ - __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) - -static struct elv_fs_entry bfq_attrs[] = { - BFQ_ATTR(quantum), - BFQ_ATTR(fifo_expire_sync), - BFQ_ATTR(fifo_expire_async), - BFQ_ATTR(back_seek_max), - BFQ_ATTR(back_seek_penalty), - BFQ_ATTR(slice_idle), - BFQ_ATTR(max_budget), - BFQ_ATTR(max_budget_async_rq), - BFQ_ATTR(timeout_sync), - BFQ_ATTR(timeout_async), - BFQ_ATTR(low_latency), - BFQ_ATTR(raising_coeff), - BFQ_ATTR(raising_max_time), - BFQ_ATTR(raising_rt_max_time), - BFQ_ATTR(raising_min_idle_time), - BFQ_ATTR(raising_min_inter_arr_async), - BFQ_ATTR(raising_max_softrt_rate), - BFQ_ATTR(weights), - __ATTR_NULL -}; - -static struct elevator_type iosched_bfq = { - .ops = { - .elevator_merge_fn = bfq_merge, - .elevator_merged_fn = bfq_merged_request, - .elevator_merge_req_fn = bfq_merged_requests, - .elevator_allow_merge_fn = bfq_allow_merge, - .elevator_dispatch_fn = bfq_dispatch_requests, - .elevator_add_req_fn = bfq_insert_request, - .elevator_activate_req_fn = bfq_activate_request, - .elevator_deactivate_req_fn = bfq_deactivate_request, - .elevator_completed_req_fn = bfq_completed_request, - .elevator_former_req_fn = elv_rb_former_request, - .elevator_latter_req_fn = elv_rb_latter_request, - .elevator_set_req_fn = bfq_set_request, - .elevator_put_req_fn = bfq_put_request, - .elevator_may_queue_fn = bfq_may_queue, - .elevator_init_fn = bfq_init_queue, - .elevator_exit_fn = bfq_exit_queue, - .trim = bfq_free_io_context, - }, - .elevator_attrs = bfq_attrs, - .elevator_name = "bfq", - .elevator_owner = THIS_MODULE, -}; - -static int __init bfq_init(void) -{ - /* - * Can be 0 on HZ < 1000 setups. - */ - //if (bfq_slice_idle == 0) - // bfq_slice_idle = 1; - bfq_slice_idle = 0; - - //if (bfq_timeout_async == 0) - // bfq_timeout_async = 1; - - if (bfq_slab_setup()) - return -ENOMEM; - - elv_register(&iosched_bfq); - - return 0; -} - -static void __exit bfq_exit(void) -{ - DECLARE_COMPLETION_ONSTACK(all_gone); - elv_unregister(&iosched_bfq); - bfq_ioc_gone = &all_gone; - /* bfq_ioc_gone's update must be visible before reading bfq_ioc_count */ - smp_wmb(); - if (elv_ioc_count_read(bfq_ioc_count) != 0) - wait_for_completion(&all_gone); - ida_destroy(&cic_index_ida); - bfq_slab_kill(); -} - -module_init(bfq_init); -module_exit(bfq_exit); - -MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c deleted file mode 100644 index 39779a8b94e..00000000000 --- a/block/bfq-sched.c +++ /dev/null @@ -1,1040 +0,0 @@ -/* - * BFQ: Hierarchical B-WF2Q+ scheduler. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente - */ - -#ifdef CONFIG_CGROUP_BFQIO -#define for_each_entity(entity) \ - for (; entity != NULL; entity = entity->parent) - -#define for_each_entity_safe(entity, parent) \ - for (; entity && ({ parent = entity->parent; 1; }); entity = parent) - -static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, - int extract, - struct bfq_data *bfqd); - -static inline void bfq_update_budget(struct bfq_entity *next_active) -{ - struct bfq_entity *bfqg_entity; - struct bfq_group *bfqg; - struct bfq_sched_data *group_sd; - - BUG_ON(next_active == NULL); - - group_sd = next_active->sched_data; - - bfqg = container_of(group_sd, struct bfq_group, sched_data); - /* - * bfq_group's my_entity field is not NULL only if the group - * is not the root group. We must not touch the root entity - * as it must never become an active entity. - */ - bfqg_entity = bfqg->my_entity; - if (bfqg_entity != NULL) - bfqg_entity->budget = next_active->budget; -} - -static int bfq_update_next_active(struct bfq_sched_data *sd) -{ - struct bfq_entity *next_active; - - if (sd->active_entity != NULL) - /* will update/requeue at the end of service */ - return 0; - - /* - * NOTE: this can be improved in many ways, such as returning - * 1 (and thus propagating upwards the update) only when the - * budget changes, or caching the bfqq that will be scheduled - * next from this subtree. By now we worry more about - * correctness than about performance... - */ - next_active = bfq_lookup_next_entity(sd, 0, NULL); - sd->next_active = next_active; - - if (next_active != NULL) - bfq_update_budget(next_active); - - return 1; -} - -static inline void bfq_check_next_active(struct bfq_sched_data *sd, - struct bfq_entity *entity) -{ - BUG_ON(sd->next_active != entity); -} -#else -#define for_each_entity(entity) \ - for (; entity != NULL; entity = NULL) - -#define for_each_entity_safe(entity, parent) \ - for (parent = NULL; entity != NULL; entity = parent) - -static inline int bfq_update_next_active(struct bfq_sched_data *sd) -{ - return 0; -} - -static inline void bfq_check_next_active(struct bfq_sched_data *sd, - struct bfq_entity *entity) -{ -} - -static inline void bfq_update_budget(struct bfq_entity *next_active) -{ -} -#endif - -/* - * Shift for timestamp calculations. This actually limits the maximum - * service allowed in one timestamp delta (small shift values increase it), - * the maximum total weight that can be used for the queues in the system - * (big shift values increase it), and the period of virtual time wraparounds. - */ -#define WFQ_SERVICE_SHIFT 22 - -/** - * bfq_gt - compare two timestamps. - * @a: first ts. - * @b: second ts. - * - * Return @a > @b, dealing with wrapping correctly. - */ -static inline int bfq_gt(u64 a, u64 b) -{ - return (s64)(a - b) > 0; -} - -static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = NULL; - - BUG_ON(entity == NULL); - - if (entity->my_sched_data == NULL) - bfqq = container_of(entity, struct bfq_queue, entity); - - return bfqq; -} - - -/** - * bfq_delta - map service into the virtual time domain. - * @service: amount of service. - * @weight: scale factor (weight of an entity or weight sum). - */ -static inline u64 bfq_delta(unsigned long service, - unsigned long weight) -{ - u64 d = (u64)service << WFQ_SERVICE_SHIFT; - - do_div(d, weight); - return d; -} - -/** - * bfq_calc_finish - assign the finish time to an entity. - * @entity: the entity to act upon. - * @service: the service to be charged to the entity. - */ -static inline void bfq_calc_finish(struct bfq_entity *entity, - unsigned long service) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(entity->weight == 0); - - entity->finish = entity->start + - bfq_delta(service, entity->weight); - - if (bfqq != NULL) { - bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: serv %lu, w %d", - service, entity->weight); - bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: start %llu, finish %llu, delta %llu", - entity->start, entity->finish, - bfq_delta(service, entity->weight)); - } -} - -/** - * bfq_entity_of - get an entity from a node. - * @node: the node field of the entity. - * - * Convert a node pointer to the relative entity. This is used only - * to simplify the logic of some functions and not as the generic - * conversion mechanism because, e.g., in the tree walking functions, - * the check for a %NULL value would be redundant. - */ -static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) -{ - struct bfq_entity *entity = NULL; - - if (node != NULL) - entity = rb_entry(node, struct bfq_entity, rb_node); - - return entity; -} - -/** - * bfq_extract - remove an entity from a tree. - * @root: the tree root. - * @entity: the entity to remove. - */ -static inline void bfq_extract(struct rb_root *root, - struct bfq_entity *entity) -{ - BUG_ON(entity->tree != root); - - entity->tree = NULL; - rb_erase(&entity->rb_node, root); -} - -/** - * bfq_idle_extract - extract an entity from the idle tree. - * @st: the service tree of the owning @entity. - * @entity: the entity being removed. - */ -static void bfq_idle_extract(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct rb_node *next; - - BUG_ON(entity->tree != &st->idle); - - if (entity == st->first_idle) { - next = rb_next(&entity->rb_node); - st->first_idle = bfq_entity_of(next); - } - - if (entity == st->last_idle) { - next = rb_prev(&entity->rb_node); - st->last_idle = bfq_entity_of(next); - } - - bfq_extract(&st->idle, entity); - - if (bfqq != NULL) - list_del(&bfqq->bfqq_list); -} - -/** - * bfq_insert - generic tree insertion. - * @root: tree root. - * @entity: entity to insert. - * - * This is used for the idle and the active tree, since they are both - * ordered by finish time. - */ -static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) -{ - struct bfq_entity *entry; - struct rb_node **node = &root->rb_node; - struct rb_node *parent = NULL; - - BUG_ON(entity->tree != NULL); - - while (*node != NULL) { - parent = *node; - entry = rb_entry(parent, struct bfq_entity, rb_node); - - if (bfq_gt(entry->finish, entity->finish)) - node = &parent->rb_left; - else - node = &parent->rb_right; - } - - rb_link_node(&entity->rb_node, parent, node); - rb_insert_color(&entity->rb_node, root); - - entity->tree = root; -} - -/** - * bfq_update_min - update the min_start field of a entity. - * @entity: the entity to update. - * @node: one of its children. - * - * This function is called when @entity may store an invalid value for - * min_start due to updates to the active tree. The function assumes - * that the subtree rooted at @node (which may be its left or its right - * child) has a valid min_start value. - */ -static inline void bfq_update_min(struct bfq_entity *entity, - struct rb_node *node) -{ - struct bfq_entity *child; - - if (node != NULL) { - child = rb_entry(node, struct bfq_entity, rb_node); - if (bfq_gt(entity->min_start, child->min_start)) - entity->min_start = child->min_start; - } -} - -/** - * bfq_update_active_node - recalculate min_start. - * @node: the node to update. - * - * @node may have changed position or one of its children may have moved, - * this function updates its min_start value. The left and right subtrees - * are assumed to hold a correct min_start value. - */ -static inline void bfq_update_active_node(struct rb_node *node) -{ - struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); - - entity->min_start = entity->start; - bfq_update_min(entity, node->rb_right); - bfq_update_min(entity, node->rb_left); -} - -/** - * bfq_update_active_tree - update min_start for the whole active tree. - * @node: the starting node. - * - * @node must be the deepest modified node after an update. This function - * updates its min_start using the values held by its children, assuming - * that they did not change, and then updates all the nodes that may have - * changed in the path to the root. The only nodes that may have changed - * are the ones in the path or their siblings. - */ -static void bfq_update_active_tree(struct rb_node *node) -{ - struct rb_node *parent; - -up: - bfq_update_active_node(node); - - parent = rb_parent(node); - if (parent == NULL) - return; - - if (node == parent->rb_left && parent->rb_right != NULL) - bfq_update_active_node(parent->rb_right); - else if (parent->rb_left != NULL) - bfq_update_active_node(parent->rb_left); - - node = parent; - goto up; -} - -/** - * bfq_active_insert - insert an entity in the active tree of its group/device. - * @st: the service tree of the entity. - * @entity: the entity being inserted. - * - * The active tree is ordered by finish time, but an extra key is kept - * per each node, containing the minimum value for the start times of - * its children (and the node itself), so it's possible to search for - * the eligible node with the lowest finish time in logarithmic time. - */ -static void bfq_active_insert(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct rb_node *node = &entity->rb_node; - - bfq_insert(&st->active, entity); - - if (node->rb_left != NULL) - node = node->rb_left; - else if (node->rb_right != NULL) - node = node->rb_right; - - bfq_update_active_tree(node); - - if (bfqq != NULL) - list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); -} - -/** - * bfq_ioprio_to_weight - calc a weight from an ioprio. - * @ioprio: the ioprio value to convert. - */ -static unsigned short bfq_ioprio_to_weight(int ioprio) -{ - WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); - return IOPRIO_BE_NR - ioprio; -} - -/** - * bfq_weight_to_ioprio - calc an ioprio from a weight. - * @weight: the weight value to convert. - * - * To preserve as mush as possible the old only-ioprio user interface, - * 0 is used as an escape ioprio value for weights (numerically) equal or - * larger than IOPRIO_BE_NR - */ -static unsigned short bfq_weight_to_ioprio(int weight) -{ - WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); - return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; -} - -static inline void bfq_get_entity(struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - if (bfqq != NULL) { - atomic_inc(&bfqq->ref); - bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", - bfqq, atomic_read(&bfqq->ref)); - } -} - -/** - * bfq_find_deepest - find the deepest node that an extraction can modify. - * @node: the node being removed. - * - * Do the first step of an extraction in an rb tree, looking for the - * node that will replace @node, and returning the deepest node that - * the following modifications to the tree can touch. If @node is the - * last node in the tree return %NULL. - */ -static struct rb_node *bfq_find_deepest(struct rb_node *node) -{ - struct rb_node *deepest; - - if (node->rb_right == NULL && node->rb_left == NULL) - deepest = rb_parent(node); - else if (node->rb_right == NULL) - deepest = node->rb_left; - else if (node->rb_left == NULL) - deepest = node->rb_right; - else { - deepest = rb_next(node); - if (deepest->rb_right != NULL) - deepest = deepest->rb_right; - else if (rb_parent(deepest) != node) - deepest = rb_parent(deepest); - } - - return deepest; -} - -/** - * bfq_active_extract - remove an entity from the active tree. - * @st: the service_tree containing the tree. - * @entity: the entity being removed. - */ -static void bfq_active_extract(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct rb_node *node; - - node = bfq_find_deepest(&entity->rb_node); - bfq_extract(&st->active, entity); - - if (node != NULL) - bfq_update_active_tree(node); - - if (bfqq != NULL) - list_del(&bfqq->bfqq_list); -} - -/** - * bfq_idle_insert - insert an entity into the idle tree. - * @st: the service tree containing the tree. - * @entity: the entity to insert. - */ -static void bfq_idle_insert(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - struct bfq_entity *first_idle = st->first_idle; - struct bfq_entity *last_idle = st->last_idle; - - if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) - st->first_idle = entity; - if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) - st->last_idle = entity; - - bfq_insert(&st->idle, entity); - - if (bfqq != NULL) - list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); -} - -/** - * bfq_forget_entity - remove an entity from the wfq trees. - * @st: the service tree. - * @entity: the entity being removed. - * - * Update the device status and forget everything about @entity, putting - * the device reference to it, if it is a queue. Entities belonging to - * groups are not refcounted. - */ -static void bfq_forget_entity(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(!entity->on_st); - - entity->on_st = 0; - st->wsum -= entity->weight; - if (bfqq != NULL) { - bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", - bfqq, atomic_read(&bfqq->ref)); - bfq_put_queue(bfqq); - } -} - -/** - * bfq_put_idle_entity - release the idle tree ref of an entity. - * @st: service tree for the entity. - * @entity: the entity being released. - */ -static void bfq_put_idle_entity(struct bfq_service_tree *st, - struct bfq_entity *entity) -{ - bfq_idle_extract(st, entity); - bfq_forget_entity(st, entity); -} - -/** - * bfq_forget_idle - update the idle tree if necessary. - * @st: the service tree to act upon. - * - * To preserve the global O(log N) complexity we only remove one entry here; - * as the idle tree will not grow indefinitely this can be done safely. - */ -static void bfq_forget_idle(struct bfq_service_tree *st) -{ - struct bfq_entity *first_idle = st->first_idle; - struct bfq_entity *last_idle = st->last_idle; - - if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && - !bfq_gt(last_idle->finish, st->vtime)) { - /* - * Forget the whole idle tree, increasing the vtime past - * the last finish time of idle entities. - */ - st->vtime = last_idle->finish; - } - - if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) - bfq_put_idle_entity(st, first_idle); -} - -static struct bfq_service_tree * -__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - struct bfq_entity *entity) -{ - struct bfq_service_tree *new_st = old_st; - - if (entity->ioprio_changed) { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - - BUG_ON(old_st->wsum < entity->weight); - old_st->wsum -= entity->weight; - - if (entity->new_weight != entity->orig_weight) { - entity->orig_weight = entity->new_weight; - entity->ioprio = - bfq_weight_to_ioprio(entity->orig_weight); - } else if (entity->new_ioprio != entity->ioprio) { - entity->ioprio = entity->new_ioprio; - entity->orig_weight = - bfq_ioprio_to_weight(entity->ioprio); - } else - entity->new_weight = entity->orig_weight = - bfq_ioprio_to_weight(entity->ioprio); - - entity->ioprio_class = entity->new_ioprio_class; - entity->ioprio_changed = 0; - - /* - * NOTE: here we may be changing the weight too early, - * this will cause unfairness. The correct approach - * would have required additional complexity to defer - * weight changes to the proper time instants (i.e., - * when entity->finish <= old_st->vtime). - */ - new_st = bfq_entity_service_tree(entity); - entity->weight = entity->orig_weight * - (bfqq != NULL ? bfqq->raising_coeff : 1); - new_st->wsum += entity->weight; - - if (new_st != old_st) - entity->start = new_st->vtime; - } - - return new_st; -} - -/** - * bfq_bfqq_served - update the scheduler status after selection for service. - * @bfqq: the queue being served. - * @served: bytes to transfer. - * - * NOTE: this can be optimized, as the timestamps of upper level entities - * are synchronized every time a new bfqq is selected for service. By now, - * we keep it to better check consistency. - */ -static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) -{ - struct bfq_entity *entity = &bfqq->entity; - struct bfq_service_tree *st; - - for_each_entity(entity) { - st = bfq_entity_service_tree(entity); - - entity->service += served; - BUG_ON(entity->service > entity->budget); - BUG_ON(st->wsum == 0); - - st->vtime += bfq_delta(served, st->wsum); - bfq_forget_idle(st); - } - bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); -} - -/** - * bfq_bfqq_charge_full_budget - set the service to the entity budget. - * @bfqq: the queue that needs a service update. - * - * When it's not possible to be fair in the service domain, because - * a queue is not consuming its budget fast enough (the meaning of - * fast depends on the timeout parameter), we charge it a full - * budget. In this way we should obtain a sort of time-domain - * fairness among all the seeky/slow queues. - */ -static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - - bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); - - bfq_bfqq_served(bfqq, entity->budget - entity->service); -} - -/** - * __bfq_activate_entity - activate an entity. - * @entity: the entity being activated. - * - * Called whenever an entity is activated, i.e., it is not active and one - * of its children receives a new request, or has to be reactivated due to - * budget exhaustion. It uses the current budget of the entity (and the - * service received if @entity is active) of the queue to calculate its - * timestamps. - */ -static void __bfq_activate_entity(struct bfq_entity *entity) -{ - struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - - if (entity == sd->active_entity) { - BUG_ON(entity->tree != NULL); - /* - * If we are requeueing the current entity we have - * to take care of not charging to it service it has - * not received. - */ - bfq_calc_finish(entity, entity->service); - entity->start = entity->finish; - sd->active_entity = NULL; - } else if (entity->tree == &st->active) { - /* - * Requeueing an entity due to a change of some - * next_active entity below it. We reuse the old - * start time. - */ - bfq_active_extract(st, entity); - } else if (entity->tree == &st->idle) { - /* - * Must be on the idle tree, bfq_idle_extract() will - * check for that. - */ - bfq_idle_extract(st, entity); - entity->start = bfq_gt(st->vtime, entity->finish) ? - st->vtime : entity->finish; - } else { - /* - * The finish time of the entity may be invalid, and - * it is in the past for sure, otherwise the queue - * would have been on the idle tree. - */ - entity->start = st->vtime; - st->wsum += entity->weight; - bfq_get_entity(entity); - - BUG_ON(entity->on_st); - entity->on_st = 1; - } - - st = __bfq_entity_update_weight_prio(st, entity); - bfq_calc_finish(entity, entity->budget); - bfq_active_insert(st, entity); -} - -/** - * bfq_activate_entity - activate an entity and its ancestors if necessary. - * @entity: the entity to activate. - * - * Activate @entity and all the entities on the path from it to the root. - */ -static void bfq_activate_entity(struct bfq_entity *entity) -{ - struct bfq_sched_data *sd; - - for_each_entity(entity) { - __bfq_activate_entity(entity); - - sd = entity->sched_data; - if (!bfq_update_next_active(sd)) - /* - * No need to propagate the activation to the - * upper entities, as they will be updated when - * the active entity is rescheduled. - */ - break; - } -} - -/** - * __bfq_deactivate_entity - deactivate an entity from its service tree. - * @entity: the entity to deactivate. - * @requeue: if false, the entity will not be put into the idle tree. - * - * Deactivate an entity, independently from its previous state. If the - * entity was not on a service tree just return, otherwise if it is on - * any scheduler tree, extract it from that tree, and if necessary - * and if the caller did not specify @requeue, put it on the idle tree. - * - * Return %1 if the caller should update the entity hierarchy, i.e., - * if the entity was under service or if it was the next_active for - * its sched_data; return %0 otherwise. - */ -static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -{ - struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - int was_active = entity == sd->active_entity; - int ret = 0; - - if (!entity->on_st) - return 0; - - BUG_ON(was_active && entity->tree != NULL); - - if (was_active) { - bfq_calc_finish(entity, entity->service); - sd->active_entity = NULL; - } else if (entity->tree == &st->active) - bfq_active_extract(st, entity); - else if (entity->tree == &st->idle) - bfq_idle_extract(st, entity); - else if (entity->tree != NULL) - BUG(); - - if (was_active || sd->next_active == entity) - ret = bfq_update_next_active(sd); - - if (!requeue || !bfq_gt(entity->finish, st->vtime)) - bfq_forget_entity(st, entity); - else - bfq_idle_insert(st, entity); - - BUG_ON(sd->active_entity == entity); - BUG_ON(sd->next_active == entity); - - return ret; -} - -/** - * bfq_deactivate_entity - deactivate an entity. - * @entity: the entity to deactivate. - * @requeue: true if the entity can be put on the idle tree - */ -static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -{ - struct bfq_sched_data *sd; - struct bfq_entity *parent; - - for_each_entity_safe(entity, parent) { - sd = entity->sched_data; - - if (!__bfq_deactivate_entity(entity, requeue)) - /* - * The parent entity is still backlogged, and - * we don't need to update it as it is still - * under service. - */ - break; - - if (sd->next_active != NULL) - /* - * The parent entity is still backlogged and - * the budgets on the path towards the root - * need to be updated. - */ - goto update; - - /* - * If we reach there the parent is no more backlogged and - * we want to propagate the dequeue upwards. - */ - requeue = 1; - } - - return; - -update: - entity = parent; - for_each_entity(entity) { - __bfq_activate_entity(entity); - - sd = entity->sched_data; - if (!bfq_update_next_active(sd)) - break; - } -} - -/** - * bfq_update_vtime - update vtime if necessary. - * @st: the service tree to act upon. - * - * If necessary update the service tree vtime to have at least one - * eligible entity, skipping to its start time. Assumes that the - * active tree of the device is not empty. - * - * NOTE: this hierarchical implementation updates vtimes quite often, - * we may end up with reactivated tasks getting timestamps after a - * vtime skip done because we needed a ->first_active entity on some - * intermediate node. - */ -static void bfq_update_vtime(struct bfq_service_tree *st) -{ - struct bfq_entity *entry; - struct rb_node *node = st->active.rb_node; - - entry = rb_entry(node, struct bfq_entity, rb_node); - if (bfq_gt(entry->min_start, st->vtime)) { - st->vtime = entry->min_start; - bfq_forget_idle(st); - } -} - -/** - * bfq_first_active - find the eligible entity with the smallest finish time - * @st: the service tree to select from. - * - * This function searches the first schedulable entity, starting from the - * root of the tree and going on the left every time on this side there is - * a subtree with at least one eligible (start >= vtime) entity. The path - * on the right is followed only if a) the left subtree contains no eligible - * entities and b) no eligible entity has been found yet. - */ -static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) -{ - struct bfq_entity *entry, *first = NULL; - struct rb_node *node = st->active.rb_node; - - while (node != NULL) { - entry = rb_entry(node, struct bfq_entity, rb_node); -left: - if (!bfq_gt(entry->start, st->vtime)) - first = entry; - - BUG_ON(bfq_gt(entry->min_start, st->vtime)); - - if (node->rb_left != NULL) { - entry = rb_entry(node->rb_left, - struct bfq_entity, rb_node); - if (!bfq_gt(entry->min_start, st->vtime)) { - node = node->rb_left; - goto left; - } - } - if (first != NULL) - break; - node = node->rb_right; - } - - BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); - return first; -} - -/** - * __bfq_lookup_next_entity - return the first eligible entity in @st. - * @st: the service tree. - * - * Update the virtual time in @st and return the first eligible entity - * it contains. - */ -static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, - bool force) -{ - struct bfq_entity *entity, *new_next_active = NULL; - - if (RB_EMPTY_ROOT(&st->active)) - return NULL; - - bfq_update_vtime(st); - entity = bfq_first_active_entity(st); - BUG_ON(bfq_gt(entity->start, st->vtime)); - - /* - * If the chosen entity does not match with the sched_data's - * next_active and we are forcedly serving the IDLE priority - * class tree, bubble up budget update. - */ - if (unlikely(force && entity != entity->sched_data->next_active)) { - new_next_active = entity; - for_each_entity(new_next_active) - bfq_update_budget(new_next_active); - } - - return entity; -} - -/** - * bfq_lookup_next_entity - return the first eligible entity in @sd. - * @sd: the sched_data. - * @extract: if true the returned entity will be also extracted from @sd. - * - * NOTE: since we cache the next_active entity at each level of the - * hierarchy, the complexity of the lookup can be decreased with - * absolutely no effort just returning the cached next_active value; - * we prefer to do full lookups to test the consistency of * the data - * structures. - */ -static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, - int extract, - struct bfq_data *bfqd) -{ - struct bfq_service_tree *st = sd->service_tree; - struct bfq_entity *entity; - int i=0; - - BUG_ON(sd->active_entity != NULL); - - if (bfqd != NULL && - jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { - entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true); - if (entity != NULL) { - i = BFQ_IOPRIO_CLASSES - 1; - bfqd->bfq_class_idle_last_service = jiffies; - sd->next_active = entity; - } - } - for (; i < BFQ_IOPRIO_CLASSES; i++) { - entity = __bfq_lookup_next_entity(st + i, false); - if (entity != NULL) { - if (extract) { - bfq_check_next_active(sd, entity); - bfq_active_extract(st + i, entity); - sd->active_entity = entity; - sd->next_active = NULL; - } - break; - } - } - - return entity; -} - -/* - * Get next queue for service. - */ -static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) -{ - struct bfq_entity *entity = NULL; - struct bfq_sched_data *sd; - struct bfq_queue *bfqq; - - BUG_ON(bfqd->active_queue != NULL); - - if (bfqd->busy_queues == 0) - return NULL; - - sd = &bfqd->root_group->sched_data; - for (; sd != NULL; sd = entity->my_sched_data) { - entity = bfq_lookup_next_entity(sd, 1, bfqd); - BUG_ON(entity == NULL); - entity->service = 0; - } - - bfqq = bfq_entity_to_bfqq(entity); - BUG_ON(bfqq == NULL); - - return bfqq; -} - -static void __bfq_bfqd_reset_active(struct bfq_data *bfqd) -{ - if (bfqd->active_cic != NULL) { - put_io_context(bfqd->active_cic->ioc); - bfqd->active_cic = NULL; - } - - bfqd->active_queue = NULL; - del_timer(&bfqd->idle_slice_timer); -} - -static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - int requeue) -{ - struct bfq_entity *entity = &bfqq->entity; - - if (bfqq == bfqd->active_queue) - __bfq_bfqd_reset_active(bfqd); - - bfq_deactivate_entity(entity, requeue); -} - -static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; - - bfq_activate_entity(entity); -} - -/* - * Called when the bfqq no longer has requests pending, remove it from - * the service tree. - */ -static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, - int requeue) -{ - BUG_ON(!bfq_bfqq_busy(bfqq)); - BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); - - bfq_log_bfqq(bfqd, bfqq, "del from busy"); - - bfq_clear_bfqq_busy(bfqq); - - BUG_ON(bfqd->busy_queues == 0); - bfqd->busy_queues--; - - bfq_deactivate_bfqq(bfqd, bfqq, requeue); -} - -/* - * Called when an inactive queue receives a new request. - */ -static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) -{ - BUG_ON(bfq_bfqq_busy(bfqq)); - BUG_ON(bfqq == bfqd->active_queue); - - bfq_log_bfqq(bfqd, bfqq, "add to busy"); - - bfq_activate_bfqq(bfqd, bfqq); - - bfq_mark_bfqq_busy(bfqq); - bfqd->busy_queues++; -} diff --git a/block/bfq.h b/block/bfq.h deleted file mode 100644 index 8d507533998..00000000000 --- a/block/bfq.h +++ /dev/null @@ -1,606 +0,0 @@ -/* - * BFQ-v6r2 for 3.1.0: data structures and common functions prototypes. - * - * Based on ideas and code from CFQ: - * Copyright (C) 2003 Jens Axboe - * - * Copyright (C) 2008 Fabio Checconi - * Paolo Valente - * - * Copyright (C) 2010 Paolo Valente - */ - -#ifndef _BFQ_H -#define _BFQ_H - -#include -#include -#include -#include - -#define BFQ_IOPRIO_CLASSES 3 -#define BFQ_CL_IDLE_TIMEOUT HZ/5 - -#define BFQ_MIN_WEIGHT 1 -#define BFQ_MAX_WEIGHT 1000 - -#define BFQ_DEFAULT_GRP_WEIGHT 10 -#define BFQ_DEFAULT_GRP_IOPRIO 0 -#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE - -struct bfq_entity; - -/** - * struct bfq_service_tree - per ioprio_class service tree. - * @active: tree for active entities (i.e., those backlogged). - * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). - * @first_idle: idle entity with minimum F_i. - * @last_idle: idle entity with maximum F_i. - * @vtime: scheduler virtual time. - * @wsum: scheduler weight sum; active and idle entities contribute to it. - * - * Each service tree represents a B-WF2Q+ scheduler on its own. Each - * ioprio_class has its own independent scheduler, and so its own - * bfq_service_tree. All the fields are protected by the queue lock - * of the containing bfqd. - */ -struct bfq_service_tree { - struct rb_root active; - struct rb_root idle; - - struct bfq_entity *first_idle; - struct bfq_entity *last_idle; - - u64 vtime; - unsigned long wsum; -}; - -/** - * struct bfq_sched_data - multi-class scheduler. - * @active_entity: entity under service. - * @next_active: head-of-the-line entity in the scheduler. - * @service_tree: array of service trees, one per ioprio_class. - * - * bfq_sched_data is the basic scheduler queue. It supports three - * ioprio_classes, and can be used either as a toplevel queue or as - * an intermediate queue on a hierarchical setup. - * @next_active points to the active entity of the sched_data service - * trees that will be scheduled next. - * - * The supported ioprio_classes are the same as in CFQ, in descending - * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. - * Requests from higher priority queues are served before all the - * requests from lower priority queues; among requests of the same - * queue requests are served according to B-WF2Q+. - * All the fields are protected by the queue lock of the containing bfqd. - */ -struct bfq_sched_data { - struct bfq_entity *active_entity; - struct bfq_entity *next_active; - struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; -}; - -/** - * struct bfq_entity - schedulable entity. - * @rb_node: service_tree member. - * @on_st: flag, true if the entity is on a tree (either the active or - * the idle one of its service_tree). - * @finish: B-WF2Q+ finish timestamp (aka F_i). - * @start: B-WF2Q+ start timestamp (aka S_i). - * @tree: tree the entity is enqueued into; %NULL if not on a tree. - * @min_start: minimum start time of the (active) subtree rooted at - * this entity; used for O(log N) lookups into active trees. - * @service: service received during the last round of service. - * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. - * @weight: weight of the queue - * @parent: parent entity, for hierarchical scheduling. - * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the - * associated scheduler queue, %NULL on leaf nodes. - * @sched_data: the scheduler queue this entity belongs to. - * @ioprio: the ioprio in use. - * @new_weight: when a weight change is requested, the new weight value. - * @orig_weight: original weight, used to implement weight boosting - * @new_ioprio: when an ioprio change is requested, the new ioprio value. - * @ioprio_class: the ioprio_class in use. - * @new_ioprio_class: when an ioprio_class change is requested, the new - * ioprio_class value. - * @ioprio_changed: flag, true when the user requested a weight, ioprio or - * ioprio_class change. - * - * A bfq_entity is used to represent either a bfq_queue (leaf node in the - * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each - * entity belongs to the sched_data of the parent group in the cgroup - * hierarchy. Non-leaf entities have also their own sched_data, stored - * in @my_sched_data. - * - * Each entity stores independently its priority values; this would - * allow different weights on different devices, but this - * functionality is not exported to userspace by now. Priorities and - * weights are updated lazily, first storing the new values into the - * new_* fields, then setting the @ioprio_changed flag. As soon as - * there is a transition in the entity state that allows the priority - * update to take place the effective and the requested priority - * values are synchronized. - * - * Unless cgroups are used, the weight value is calculated from the - * ioprio to export the same interface as CFQ. When dealing with - * ``well-behaved'' queues (i.e., queues that do not spend too much - * time to consume their budget and have true sequential behavior, and - * when there are no external factors breaking anticipation) the - * relative weights at each level of the cgroups hierarchy should be - * guaranteed. All the fields are protected by the queue lock of the - * containing bfqd. - */ -struct bfq_entity { - struct rb_node rb_node; - - int on_st; - - u64 finish; - u64 start; - - struct rb_root *tree; - - u64 min_start; - - unsigned long service, budget; - unsigned short weight, new_weight; - unsigned short orig_weight; - - struct bfq_entity *parent; - - struct bfq_sched_data *my_sched_data; - struct bfq_sched_data *sched_data; - - unsigned short ioprio, new_ioprio; - unsigned short ioprio_class, new_ioprio_class; - - int ioprio_changed; -}; - -struct bfq_group; - -/** - * struct bfq_queue - leaf schedulable entity. - * @ref: reference counter. - * @bfqd: parent bfq_data. - * @new_bfqq: shared bfq_queue if queue is cooperating with - * one or more other queues. - * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). - * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). - * @sort_list: sorted list of pending requests. - * @next_rq: if fifo isn't expired, next request to serve. - * @queued: nr of requests queued in @sort_list. - * @allocated: currently allocated requests. - * @meta_pending: pending metadata requests. - * @fifo: fifo list of requests in sort_list. - * @entity: entity representing this queue in the scheduler. - * @max_budget: maximum budget allowed from the feedback mechanism. - * @budget_timeout: budget expiration (in jiffies). - * @dispatched: number of requests on the dispatch list or inside driver. - * @org_ioprio: saved ioprio during boosted periods. - * @flags: status flags. - * @bfqq_list: node for active/idle bfqq list inside our bfqd. - * @seek_samples: number of seeks sampled - * @seek_total: sum of the distances of the seeks sampled - * @seek_mean: mean seek distance - * @last_request_pos: position of the last request enqueued - * @pid: pid of the process owning the queue, used for logging purposes. - * @last_rais_start_time: last (idle -> weight-raised) transition attempt - * @raising_cur_max_time: current max raising time for this queue - * @cic: pointer to the cfq_io_context owning the bfq_queue, set to %NULL if the - * queue is shared - * - * A bfq_queue is a leaf request queue; it can be associated to an io_context - * or more (if it is an async one). @cgroup holds a reference to the - * cgroup, to be sure that it does not disappear while a bfqq still - * references it (mostly to avoid races between request issuing and task - * migration followed by cgroup distruction). - * All the fields are protected by the queue lock of the containing bfqd. - */ -struct bfq_queue { - atomic_t ref; - struct bfq_data *bfqd; - - /* fields for cooperating queues handling */ - struct bfq_queue *new_bfqq; - struct rb_node pos_node; - struct rb_root *pos_root; - - struct rb_root sort_list; - struct request *next_rq; - int queued[2]; - int allocated[2]; - int meta_pending; - struct list_head fifo; - - struct bfq_entity entity; - - unsigned long max_budget; - unsigned long budget_timeout; - - int dispatched; - - unsigned short org_ioprio; - - unsigned int flags; - - struct list_head bfqq_list; - - unsigned int seek_samples; - u64 seek_total; - sector_t seek_mean; - sector_t last_request_pos; - - pid_t pid; - - /* weight-raising fields */ - unsigned int raising_cur_max_time; - u64 last_rais_start_finish, soft_rt_next_start; - unsigned int raising_coeff; - - struct cfq_io_context *cic; -}; - -/** - * struct bfq_data - per device data structure. - * @queue: request queue for the managed device. - * @root_group: root bfq_group for the device. - * @rq_pos_tree: rbtree sorted by next_request position, - * used when determining if two or more queues - * have interleaving requests (see bfq_close_cooperator). - * @eqm_lock: spinlock used to protect all data structures pertaining - * the Early Queue Merge (EQM) mechanism. - * @busy_queues: number of bfq_queues containing requests (including the - * queue under service, even if it is idling). - * @queued: number of queued requests. - * @rq_in_driver: number of requests dispatched and waiting for completion. - * @sync_flight: number of sync requests in the driver. - * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples - * completed requests . - * @hw_tag_samples: nr of samples used to calculate hw_tag. - * @hw_tag: flag set to one if the driver is showing a queueing behavior. - * @budgets_assigned: number of budgets assigned. - * @idle_slice_timer: timer set when idling for the next sequential request - * from the queue under service. - * @unplug_work: delayed work to restart dispatching on the request queue. - * @active_queue: bfq_queue under service. - * @active_cic: cfq_io_context (cic) associated with the @active_queue. - * @last_position: on-disk position of the last served request. - * @last_budget_start: beginning of the last budget. - * @last_idling_start: beginning of the last idle slice. - * @peak_rate: peak transfer rate observed for a budget. - * @peak_rate_samples: number of samples used to calculate @peak_rate. - * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling. - * @cic_index: use small consequent indexes as radix tree keys to reduce depth - * @cic_list: list of all the cics active on the bfq_data device. - * @group_list: list of all the bfq_groups active on the device. - * @active_list: list of all the bfq_queues active on the device. - * @idle_list: list of all the bfq_queues idle on the device. - * @bfq_quantum: max number of requests dispatched per dispatch round. - * @bfq_fifo_expire: timeout for async/sync requests; when it expires - * requests are served in fifo order. - * @bfq_back_penalty: weight of backward seeks wrt forward ones. - * @bfq_back_max: maximum allowed backward seek. - * @bfq_slice_idle: maximum idling time. - * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning). - * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to - * async queues. - * @bfq_timeout: timeout for bfq_queues to consume their budget; used to - * to prevent seeky queues to impose long latencies to well - * behaved ones (this also implies that seeky queues cannot - * receive guarantees in the service domain; after a timeout - * they are charged for the whole allocated budget, to try - * to preserve a behavior reasonably fair among them, but - * without service-domain guarantees). - * @bfq_raising_coeff: Maximum factor by which the weight of a boosted - * queue is multiplied - * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies) - * @bfq_raising_rt_max_time: maximum duration for soft real-time processes - * @bfq_raising_min_idle_time: minimum idle period after which weight-raising - * may be reactivated for a queue (in jiffies) - * @bfq_raising_min_inter_arr_async: minimum period between request arrivals - * after which weight-raising may be - * reactivated for an already busy queue - * (in jiffies) - * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue, - * sectors per seconds - * @RT_prod: cached value of the product R*T used for computing the maximum - * duration of the weight raising automatically - * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions - * - * All the fields are protected by the @queue lock. - */ -struct bfq_data { - struct request_queue *queue; - - struct bfq_group *root_group; - - struct rb_root rq_pos_tree; - spinlock_t eqm_lock; - - int busy_queues; - int queued; - int rq_in_driver; - int sync_flight; - - int max_rq_in_driver; - int hw_tag_samples; - int hw_tag; - - int budgets_assigned; - - struct timer_list idle_slice_timer; - struct work_struct unplug_work; - - struct bfq_queue *active_queue; - struct cfq_io_context *active_cic; - - sector_t last_position; - - ktime_t last_budget_start; - ktime_t last_idling_start; - int peak_rate_samples; - u64 peak_rate; - unsigned long bfq_max_budget; - - unsigned int cic_index; - struct list_head cic_list; - struct hlist_head group_list; - struct list_head active_list; - struct list_head idle_list; - - unsigned int bfq_quantum; - unsigned int bfq_fifo_expire[2]; - unsigned int bfq_back_penalty; - unsigned int bfq_back_max; - unsigned int bfq_slice_idle; - u64 bfq_class_idle_last_service; - - unsigned int bfq_user_max_budget; - unsigned int bfq_max_budget_async_rq; - unsigned int bfq_timeout[2]; - - bool low_latency; - - /* parameters of the low_latency heuristics */ - unsigned int bfq_raising_coeff; - unsigned int bfq_raising_max_time; - unsigned int bfq_raising_rt_max_time; - unsigned int bfq_raising_min_idle_time; - unsigned int bfq_raising_min_inter_arr_async; - unsigned int bfq_raising_max_softrt_rate; - u64 RT_prod; - - struct bfq_queue oom_bfqq; -}; - -enum bfqq_state_flags { - BFQ_BFQQ_FLAG_busy = 0, /* has requests or is under service */ - BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ - BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ - BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ - BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ - BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ - BFQ_BFQQ_FLAG_sync, /* synchronous queue */ - BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ - BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ - BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ - BFQ_BFQQ_FLAG_some_coop_idle, /* some cooperator is inactive */ - BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ -}; - -#define BFQ_BFQQ_FNS(name) \ -static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ -{ \ - (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ -} \ -static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ -{ \ - (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ -} \ -static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ -{ \ - return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ -} - -BFQ_BFQQ_FNS(busy); -BFQ_BFQQ_FNS(wait_request); -BFQ_BFQQ_FNS(must_alloc); -BFQ_BFQQ_FNS(fifo_expire); -BFQ_BFQQ_FNS(idle_window); -BFQ_BFQQ_FNS(prio_changed); -BFQ_BFQQ_FNS(sync); -BFQ_BFQQ_FNS(budget_new); -BFQ_BFQQ_FNS(coop); -BFQ_BFQQ_FNS(split_coop); -BFQ_BFQQ_FNS(some_coop_idle); -BFQ_BFQQ_FNS(just_split); -#undef BFQ_BFQQ_FNS - -/* Logging facilities. */ -#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) - -#define bfq_log(bfqd, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) - -/* Expiration reasons. */ -enum bfqq_expiration { - BFQ_BFQQ_TOO_IDLE = 0, /* queue has been idling for too long */ - BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ - BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ - BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ -}; - -#ifdef CONFIG_CGROUP_BFQIO -/** - * struct bfq_group - per (device, cgroup) data structure. - * @entity: schedulable entity to insert into the parent group sched_data. - * @sched_data: own sched_data, to contain child entities (they may be - * both bfq_queues and bfq_groups). - * @group_node: node to be inserted into the bfqio_cgroup->group_data - * list of the containing cgroup's bfqio_cgroup. - * @bfqd_node: node to be inserted into the @bfqd->group_list list - * of the groups active on the same device; used for cleanup. - * @bfqd: the bfq_data for the device this group acts upon. - * @async_bfqq: array of async queues for all the tasks belonging to - * the group, one queue per ioprio value per ioprio_class, - * except for the idle class that has only one queue. - * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). - * @my_entity: pointer to @entity, %NULL for the toplevel group; used - * to avoid too many special cases during group creation/migration. - * - * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup - * there is a set of bfq_groups, each one collecting the lower-level - * entities belonging to the group that are acting on the same device. - * - * Locking works as follows: - * o @group_node is protected by the bfqio_cgroup lock, and is accessed - * via RCU from its readers. - * o @bfqd is protected by the queue lock, RCU is used to access it - * from the readers. - * o All the other fields are protected by the @bfqd queue lock. - */ -struct bfq_group { - struct bfq_entity entity; - struct bfq_sched_data sched_data; - - struct hlist_node group_node; - struct hlist_node bfqd_node; - - void *bfqd; - - struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; - struct bfq_queue *async_idle_bfqq; - - struct bfq_entity *my_entity; -}; - -/** - * struct bfqio_cgroup - bfq cgroup data structure. - * @css: subsystem state for bfq in the containing cgroup. - * @weight: cgroup weight. - * @ioprio: cgroup ioprio. - * @ioprio_class: cgroup ioprio_class. - * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. - * @group_data: list containing the bfq_group belonging to this cgroup. - * - * @group_data is accessed using RCU, with @lock protecting the updates, - * @ioprio and @ioprio_class are protected by @lock. - */ -struct bfqio_cgroup { - struct cgroup_subsys_state css; - - unsigned short weight, ioprio, ioprio_class; - - spinlock_t lock; - struct hlist_head group_data; -}; -#else -struct bfq_group { - struct bfq_sched_data sched_data; - - struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; - struct bfq_queue *async_idle_bfqq; -}; -#endif - -static inline struct bfq_service_tree * -bfq_entity_service_tree(struct bfq_entity *entity) -{ - struct bfq_sched_data *sched_data = entity->sched_data; - unsigned int idx = entity->ioprio_class - 1; - - BUG_ON(idx >= BFQ_IOPRIO_CLASSES); - BUG_ON(sched_data == NULL); - - return sched_data->service_tree + idx; -} - -static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic, - int is_sync) -{ - return cic->cfqq[!!is_sync]; -} - -static inline void cic_set_bfqq(struct cfq_io_context *cic, - struct bfq_queue *bfqq, int is_sync) -{ - cic->cfqq[!!is_sync] = bfqq; -} - -static inline void call_for_each_cic(struct io_context *ioc, - void (*func)(struct io_context *, - struct cfq_io_context *)) -{ - struct cfq_io_context *cic; - struct hlist_node *n; - - rcu_read_lock(); - hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) - func(ioc, cic); - rcu_read_unlock(); -} - -#define CIC_DEAD_KEY 1ul -#define CIC_DEAD_INDEX_SHIFT 1 - -static inline void *bfqd_dead_key(struct bfq_data *bfqd) -{ - return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); -} - -/** - * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. - * @ptr: a pointer to a bfqd. - * @flags: storage for the flags to be saved. - * - * This function allows cic->key and bfqg->bfqd to be protected by the - * queue lock of the bfqd they reference; the pointer is dereferenced - * under RCU, so the storage for bfqd is assured to be safe as long - * as the RCU read side critical section does not end. After the - * bfqd->queue->queue_lock is taken the pointer is rechecked, to be - * sure that no other writer accessed it. If we raced with a writer, - * the function returns NULL, with the queue unlocked, otherwise it - * returns the dereferenced pointer, with the queue locked. - */ -static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, - unsigned long *flags) -{ - struct bfq_data *bfqd; - - rcu_read_lock(); - bfqd = rcu_dereference(*(struct bfq_data **)ptr); - - if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) { - spin_lock_irqsave(bfqd->queue->queue_lock, *flags); - if (*ptr == bfqd) - goto out; - spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); - } - - bfqd = NULL; -out: - rcu_read_unlock(); - return bfqd; -} - -static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, - unsigned long *flags) -{ - spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -} - -static void bfq_changed_ioprio(struct io_context *ioc, - struct cfq_io_context *cic); -static void bfq_put_queue(struct bfq_queue *bfqq); -static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); -static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, - struct bfq_group *bfqg, int is_sync, - struct io_context *ioc, gfp_t gfp_mask); -static void bfq_end_raising_async_queues(struct bfq_data *bfqd, - struct bfq_group *bfqg); -static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); -static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); -#endif From 1d4b4bd751aa86c62a6ceb1b596d6106172fd02c Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 01:26:36 -0400 Subject: [PATCH 639/678] Revert "[PATCH 2/4] block: cgroups, kconfig, build bits for BFQ-v6r2-3.1" This reverts commit 57e9eeb1d03b904755b86f6c240539a6a99839c7. --- include/linux/cgroup_subsys.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index f2ca8cf3a88..b7fd4c8c70c 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -69,10 +69,4 @@ SUBSYS(perf) SUBSYS(timer_slack) #endif -/* */ - -#ifdef CONFIG_CGROUP_BFQIO -SUBSYS(bfqio) -#endif - -/* */ +/* */ \ No newline at end of file From fbfa3ab82d4e890b5b8de9ffe42ef749c9e88731 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 01:26:50 -0400 Subject: [PATCH 640/678] Revert "[PATCH 1/4] block: prepare I/O context code for BFQ-v6r2 for 3.1" This reverts commit 6bb77599c2f022a0a610261f93e9596e3bd9ae3d. --- block/Kconfig.iosched | 26 -------------------------- block/blk-ioc.c | 30 +++++++++++++----------------- block/cfq-iosched.c | 10 +++------- fs/ioprio.c | 9 ++------- include/linux/iocontext.h | 21 +++------------------ 5 files changed, 21 insertions(+), 75 deletions(-) diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 06ec27e59a0..8201a45cd26 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -71,28 +71,6 @@ config IOSCHED_VR Requests are chosen according to SSTF with a penalty of rev_penalty for switching head direction. -config IOSCHED_BFQ - tristate "BFQ I/O scheduler" - depends on EXPERIMENTAL - default n - ---help--- - The BFQ I/O scheduler tries to distribute bandwidth among - all processes according to their weights. - It aims at distributing the bandwidth as desired, independently of - the disk parameters and with any workload. It also tries to - guarantee low latency to interactive and soft real-time - applications. If compiled built-in (saying Y here), BFQ can - be configured to support hierarchical scheduling. - -config CGROUP_BFQIO - bool "BFQ hierarchical scheduling support" - depends on CGROUPS && IOSCHED_BFQ=y - default n - ---help--- - Enable hierarchical scheduling in BFQ, using the cgroups - filesystem interface. The name of the subsystem will be - bfqio. - choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -116,9 +94,6 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y - config DEFAULT_BFQ - bool "BFQ" if IOSCHED_BFQ=y - config DEFAULT_NOOP bool "No-op" @@ -135,7 +110,6 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ - default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO default "vr" if DEFAULT_VR diff --git a/block/blk-ioc.c b/block/blk-ioc.c index d0d16d4a79a..6f9bbd97865 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -17,12 +16,13 @@ */ static struct kmem_cache *iocontext_cachep; -static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) +static void cfq_dtor(struct io_context *ioc) { - if (!hlist_empty(list)) { + if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = hlist_entry(list->first, struct cfq_io_context, cic_list); + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, + cic_list); cic->dtor(ioc); } } @@ -40,9 +40,7 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - - hlist_sched_dtor(ioc, &ioc->cic_list); - hlist_sched_dtor(ioc, &ioc->bfq_cic_list); + cfq_dtor(ioc); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -52,14 +50,15 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) +static void cfq_exit(struct io_context *ioc) { rcu_read_lock(); - if (!hlist_empty(list)) { + if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = hlist_entry(list->first, struct cfq_io_context, cic_list); + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, + cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -75,10 +74,9 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) { - hlist_sched_exit(ioc, &ioc->cic_list); - hlist_sched_exit(ioc, &ioc->bfq_cic_list); - } + if (atomic_dec_and_test(&ioc->nr_tasks)) + cfq_exit(ioc); + put_io_context(ioc); } @@ -91,14 +89,12 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); + ioc->ioprio_changed = 0; ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); - INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); - INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 0f60ba0ad87..97c3d462732 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2934,6 +2934,7 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); + ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3225,13 +3226,8 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - /* - * test_and_clear_bit() implies a memory barrier, paired with - * the wmb() in fs/ioprio.c, so the value seen for ioprio is the - * new one. - */ - if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, - ioc->ioprio_changed))) + smp_read_barrier_depends(); + if (unlikely(ioc->ioprio_changed)) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED diff --git a/fs/ioprio.c b/fs/ioprio.c index 95a6c2b04e0..7da2a06508e 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err, i; + int err; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,17 +60,12 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } - /* let other ioc users see the new values */ - smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - /* make sure schedulers see the new ioprio value */ - wmb(); - for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) - set_bit(i, ioc->ioprio_changed); + ioc->ioprio_changed = 1; } task_unlock(task); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index fbdaa5aef61..5037a0ad231 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H -#include #include #include +struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,15 +16,12 @@ struct cfq_ttime { struct cfq_io_context { void *key; - void *cfqq[2]; + struct cfq_queue *cfqq[2]; struct io_context *ioc; struct cfq_ttime ttime; - unsigned int raising_time_left; - unsigned int saved_idle_window; - struct list_head queue_list; struct hlist_node cic_list; @@ -34,16 +31,6 @@ struct cfq_io_context { struct rcu_head rcu_head; }; -/* - * Indexes into the ioprio_changed bitmap. A bit set indicates that - * the corresponding I/O scheduler needs to see a ioprio update. - */ -enum { - IOC_CFQ_IOPRIO_CHANGED, - IOC_BFQ_IOPRIO_CHANGED, - IOC_IOPRIO_CHANGED_BITS -}; - /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -56,7 +43,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); + unsigned short ioprio_changed; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -70,8 +57,6 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - struct radix_tree_root bfq_radix_root; - struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; From 0b3d4ac98d9657b3927313a591205861186c17d9 Mon Sep 17 00:00:00 2001 From: Metallice Date: Wed, 18 Jun 2014 19:43:44 -0400 Subject: [PATCH 641/678] BFQ v7r2 --- block/Kconfig.iosched | 33 +++++++++++++++++++++++++++++++++ block/Makefile | 1 + block/blk-ioc.c | 30 +++++++++++++++++------------- block/cfq-iosched.c | 10 +++++++--- fs/ioprio.c | 9 +++++++-- include/linux/cgroup_subsys.h | 8 +++++++- include/linux/iocontext.h | 21 ++++++++++++++++++--- 7 files changed, 90 insertions(+), 22 deletions(-) diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 8201a45cd26..8b27e90013a 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -71,6 +71,28 @@ config IOSCHED_VR Requests are chosen according to SSTF with a penalty of rev_penalty for switching head direction. +config IOSCHED_BFQ + tristate "BFQ I/O scheduler" + depends on EXPERIMENTAL + default n + ---help--- + The BFQ I/O scheduler tries to distribute bandwidth among + all processes according to their weights. + It aims at distributing the bandwidth as desired, independently of + the disk parameters and with any workload. It also tries to + guarantee low latency to interactive and soft real-time + applications. If compiled built-in (saying Y here), BFQ can + be configured to support hierarchical scheduling. + +config CGROUP_BFQIO + bool "BFQ hierarchical scheduling support" + depends on CGROUPS && IOSCHED_BFQ=y + default n + ---help--- + Enable hierarchical scheduling in BFQ, using the cgroups + filesystem interface. The name of the subsystem will be + bfqio. + choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -94,6 +116,16 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y + config DEFAULT_BFQ + bool "BFQ" if IOSCHED_BFQ=y + help + Selects BFQ as the default I/O scheduler which will be + used by default for all block devices. + The BFQ I/O scheduler aims at distributing the bandwidth + as desired, independently of the disk parameters and with + any workload. It also tries to guarantee low latency to + interactive and soft real-time applications. + config DEFAULT_NOOP bool "No-op" @@ -110,6 +142,7 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ + default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO default "vr" if DEFAULT_VR diff --git a/block/Makefile b/block/Makefile index eb332a2d98c..760d8f3ff2e 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o +obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 6f9bbd97865..d0d16d4a79a 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -16,13 +17,12 @@ */ static struct kmem_cache *iocontext_cachep; -static void cfq_dtor(struct io_context *ioc) +static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) { - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->dtor(ioc); } } @@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - cfq_dtor(ioc); + + hlist_sched_dtor(ioc, &ioc->cic_list); + hlist_sched_dtor(ioc, &ioc->bfq_cic_list); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void cfq_exit(struct io_context *ioc) +static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) { rcu_read_lock(); - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -74,9 +75,10 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) - cfq_exit(ioc); - + if (atomic_dec_and_test(&ioc->nr_tasks)) { + hlist_sched_exit(ioc, &ioc->cic_list); + hlist_sched_exit(ioc, &ioc->bfq_cic_list); + } put_io_context(ioc); } @@ -89,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - ioc->ioprio_changed = 0; + bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); + INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); + INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 97c3d462732..0f60ba0ad87 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2934,7 +2934,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); - ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3226,8 +3225,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - smp_read_barrier_depends(); - if (unlikely(ioc->ioprio_changed)) + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED diff --git a/fs/ioprio.c b/fs/ioprio.c index 7da2a06508e..95a6c2b04e0 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err; + int err, i; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } + /* let other ioc users see the new values */ + smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - ioc->ioprio_changed = 1; + /* make sure schedulers see the new ioprio value */ + wmb(); + for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) + set_bit(i, ioc->ioprio_changed); } task_unlock(task); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index b7fd4c8c70c..f2ca8cf3a88 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -69,4 +69,10 @@ SUBSYS(perf) SUBSYS(timer_slack) #endif -/* */ \ No newline at end of file +/* */ + +#ifdef CONFIG_CGROUP_BFQIO +SUBSYS(bfqio) +#endif + +/* */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 5037a0ad231..65b7a04013b 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H +#include #include #include -struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,12 +16,15 @@ struct cfq_ttime { struct cfq_io_context { void *key; - struct cfq_queue *cfqq[2]; + void *cfqq[2]; struct io_context *ioc; struct cfq_ttime ttime; + unsigned int wr_time_left; + unsigned int saved_idle_window; + struct list_head queue_list; struct hlist_node cic_list; @@ -31,6 +34,16 @@ struct cfq_io_context { struct rcu_head rcu_head; }; +/* + * Indexes into the ioprio_changed bitmap. A bit set indicates that + * the corresponding I/O scheduler needs to see a ioprio update. + */ +enum { + IOC_CFQ_IOPRIO_CHANGED, + IOC_BFQ_IOPRIO_CHANGED, + IOC_IOPRIO_CHANGED_BITS +}; + /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -43,7 +56,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; + DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -57,6 +70,8 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; + struct radix_tree_root bfq_radix_root; + struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; From bab72ec8358e1ebe93ce60ed8c2765baaf236154 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 30 Jun 2014 20:52:54 -0400 Subject: [PATCH 642/678] Revert "BFQ v7r2" This reverts commit 0b3d4ac98d9657b3927313a591205861186c17d9. --- block/Kconfig.iosched | 33 --------------------------------- block/Makefile | 1 - block/blk-ioc.c | 30 +++++++++++++----------------- block/cfq-iosched.c | 10 +++------- fs/ioprio.c | 9 ++------- include/linux/cgroup_subsys.h | 8 +------- include/linux/iocontext.h | 21 +++------------------ 7 files changed, 22 insertions(+), 90 deletions(-) diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 8b27e90013a..8201a45cd26 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -71,28 +71,6 @@ config IOSCHED_VR Requests are chosen according to SSTF with a penalty of rev_penalty for switching head direction. -config IOSCHED_BFQ - tristate "BFQ I/O scheduler" - depends on EXPERIMENTAL - default n - ---help--- - The BFQ I/O scheduler tries to distribute bandwidth among - all processes according to their weights. - It aims at distributing the bandwidth as desired, independently of - the disk parameters and with any workload. It also tries to - guarantee low latency to interactive and soft real-time - applications. If compiled built-in (saying Y here), BFQ can - be configured to support hierarchical scheduling. - -config CGROUP_BFQIO - bool "BFQ hierarchical scheduling support" - depends on CGROUPS && IOSCHED_BFQ=y - default n - ---help--- - Enable hierarchical scheduling in BFQ, using the cgroups - filesystem interface. The name of the subsystem will be - bfqio. - choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -116,16 +94,6 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y - config DEFAULT_BFQ - bool "BFQ" if IOSCHED_BFQ=y - help - Selects BFQ as the default I/O scheduler which will be - used by default for all block devices. - The BFQ I/O scheduler aims at distributing the bandwidth - as desired, independently of the disk parameters and with - any workload. It also tries to guarantee low latency to - interactive and soft real-time applications. - config DEFAULT_NOOP bool "No-op" @@ -142,7 +110,6 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ - default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO default "vr" if DEFAULT_VR diff --git a/block/Makefile b/block/Makefile index 760d8f3ff2e..eb332a2d98c 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o diff --git a/block/blk-ioc.c b/block/blk-ioc.c index d0d16d4a79a..6f9bbd97865 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -17,12 +16,13 @@ */ static struct kmem_cache *iocontext_cachep; -static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) +static void cfq_dtor(struct io_context *ioc) { - if (!hlist_empty(list)) { + if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = hlist_entry(list->first, struct cfq_io_context, cic_list); + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, + cic_list); cic->dtor(ioc); } } @@ -40,9 +40,7 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - - hlist_sched_dtor(ioc, &ioc->cic_list); - hlist_sched_dtor(ioc, &ioc->bfq_cic_list); + cfq_dtor(ioc); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -52,14 +50,15 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) +static void cfq_exit(struct io_context *ioc) { rcu_read_lock(); - if (!hlist_empty(list)) { + if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = hlist_entry(list->first, struct cfq_io_context, cic_list); + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, + cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -75,10 +74,9 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) { - hlist_sched_exit(ioc, &ioc->cic_list); - hlist_sched_exit(ioc, &ioc->bfq_cic_list); - } + if (atomic_dec_and_test(&ioc->nr_tasks)) + cfq_exit(ioc); + put_io_context(ioc); } @@ -91,14 +89,12 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); + ioc->ioprio_changed = 0; ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); - INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); - INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 0f60ba0ad87..97c3d462732 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2934,6 +2934,7 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); + ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3225,13 +3226,8 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - /* - * test_and_clear_bit() implies a memory barrier, paired with - * the wmb() in fs/ioprio.c, so the value seen for ioprio is the - * new one. - */ - if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, - ioc->ioprio_changed))) + smp_read_barrier_depends(); + if (unlikely(ioc->ioprio_changed)) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED diff --git a/fs/ioprio.c b/fs/ioprio.c index 95a6c2b04e0..7da2a06508e 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err, i; + int err; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,17 +60,12 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } - /* let other ioc users see the new values */ - smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - /* make sure schedulers see the new ioprio value */ - wmb(); - for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) - set_bit(i, ioc->ioprio_changed); + ioc->ioprio_changed = 1; } task_unlock(task); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index f2ca8cf3a88..b7fd4c8c70c 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -69,10 +69,4 @@ SUBSYS(perf) SUBSYS(timer_slack) #endif -/* */ - -#ifdef CONFIG_CGROUP_BFQIO -SUBSYS(bfqio) -#endif - -/* */ +/* */ \ No newline at end of file diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 65b7a04013b..5037a0ad231 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H -#include #include #include +struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,15 +16,12 @@ struct cfq_ttime { struct cfq_io_context { void *key; - void *cfqq[2]; + struct cfq_queue *cfqq[2]; struct io_context *ioc; struct cfq_ttime ttime; - unsigned int wr_time_left; - unsigned int saved_idle_window; - struct list_head queue_list; struct hlist_node cic_list; @@ -34,16 +31,6 @@ struct cfq_io_context { struct rcu_head rcu_head; }; -/* - * Indexes into the ioprio_changed bitmap. A bit set indicates that - * the corresponding I/O scheduler needs to see a ioprio update. - */ -enum { - IOC_CFQ_IOPRIO_CHANGED, - IOC_BFQ_IOPRIO_CHANGED, - IOC_IOPRIO_CHANGED_BITS -}; - /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -56,7 +43,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); + unsigned short ioprio_changed; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -70,8 +57,6 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - struct radix_tree_root bfq_radix_root; - struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; From 3e757e093115a0b599e4eba91ebfd91fec777904 Mon Sep 17 00:00:00 2001 From: Metallice Date: Mon, 30 Jun 2014 21:15:58 -0400 Subject: [PATCH 643/678] BFQ v7r4 --- arch/arm/configs/metallice_grouper_defconfig | 2 +- block/Kconfig.iosched | 33 + block/Makefile | 3 +- block/bfq-cgroup.c | 900 ++++ block/bfq-ioc.c | 410 ++ block/bfq-iosched.c | 3956 ++++++++++++++++++ block/bfq-sched.c | 1177 ++++++ block/bfq.h | 742 ++++ block/blk-ioc.c | 30 +- block/cfq-iosched.c | 10 +- fs/ioprio.c | 9 +- include/linux/cgroup_subsys.h | 8 +- include/linux/iocontext.h | 25 +- 13 files changed, 7281 insertions(+), 24 deletions(-) create mode 100644 block/bfq-cgroup.c create mode 100644 block/bfq-ioc.c create mode 100644 block/bfq-iosched.c create mode 100644 block/bfq-sched.c create mode 100644 block/bfq.h diff --git a/arch/arm/configs/metallice_grouper_defconfig b/arch/arm/configs/metallice_grouper_defconfig index 234cef6be01..11b7b7d4587 100644 --- a/arch/arm/configs/metallice_grouper_defconfig +++ b/arch/arm/configs/metallice_grouper_defconfig @@ -38,7 +38,7 @@ CONFIG_IRQ_WORK=y CONFIG_EXPERIMENTAL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_CROSS_COMPILE="" -CONFIG_LOCALVERSION="-MKernel-a68" +CONFIG_LOCALVERSION="-MKernel-a69" CONFIG_LOCALVERSION_AUTO=y CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_LZMA=y diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 8201a45cd26..8b27e90013a 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -71,6 +71,28 @@ config IOSCHED_VR Requests are chosen according to SSTF with a penalty of rev_penalty for switching head direction. +config IOSCHED_BFQ + tristate "BFQ I/O scheduler" + depends on EXPERIMENTAL + default n + ---help--- + The BFQ I/O scheduler tries to distribute bandwidth among + all processes according to their weights. + It aims at distributing the bandwidth as desired, independently of + the disk parameters and with any workload. It also tries to + guarantee low latency to interactive and soft real-time + applications. If compiled built-in (saying Y here), BFQ can + be configured to support hierarchical scheduling. + +config CGROUP_BFQIO + bool "BFQ hierarchical scheduling support" + depends on CGROUPS && IOSCHED_BFQ=y + default n + ---help--- + Enable hierarchical scheduling in BFQ, using the cgroups + filesystem interface. The name of the subsystem will be + bfqio. + choice prompt "Default I/O scheduler" default DEFAULT_CFQ @@ -94,6 +116,16 @@ choice config DEFAULT_CFQ bool "CFQ" if IOSCHED_CFQ=y + config DEFAULT_BFQ + bool "BFQ" if IOSCHED_BFQ=y + help + Selects BFQ as the default I/O scheduler which will be + used by default for all block devices. + The BFQ I/O scheduler aims at distributing the bandwidth + as desired, independently of the disk parameters and with + any workload. It also tries to guarantee low latency to + interactive and soft real-time applications. + config DEFAULT_NOOP bool "No-op" @@ -110,6 +142,7 @@ config DEFAULT_IOSCHED default "deadline" if DEFAULT_DEADLINE default "row" if DEFAULT_ROW default "cfq" if DEFAULT_CFQ + default "bfq" if DEFAULT_BFQ default "noop" if DEFAULT_NOOP default "sio" if DEFAULT_SIO default "vr" if DEFAULT_VR diff --git a/block/Makefile b/block/Makefile index eb332a2d98c..8613fe380f0 100644 --- a/block/Makefile +++ b/block/Makefile @@ -15,7 +15,8 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_ROW) += row-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o +obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o +obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c new file mode 100644 index 00000000000..349fa7facd0 --- /dev/null +++ b/block/bfq-cgroup.c @@ -0,0 +1,900 @@ +/* + * BFQ: CGROUPS support. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ + * file. + */ + +#ifdef CONFIG_CGROUP_BFQIO +static struct bfqio_cgroup bfqio_root_cgroup = { + .weight = BFQ_DEFAULT_GRP_WEIGHT, + .ioprio = BFQ_DEFAULT_GRP_IOPRIO, + .ioprio_class = BFQ_DEFAULT_GRP_CLASS, +}; + +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; +} + +static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) +{ + return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), + struct bfqio_cgroup, css); +} + +/* + * Search the bfq_group for bfqd into the hash table (by now only a list) + * of bgrp. Must be called under rcu_read_lock(). + */ +static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, + struct bfq_data *bfqd) +{ + struct bfq_group *bfqg; + struct hlist_node *n; + void *key; + + hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) { + key = rcu_dereference(bfqg->bfqd); + if (key == bfqd) + return bfqg; + } + + return NULL; +} + +static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, + struct bfq_group *bfqg) +{ + struct bfq_entity *entity = &bfqg->entity; + + /* + * If the weight of the entity has never been set via the sysfs + * interface, then bgrp->weight == 0. In this case we initialize + * the weight from the current ioprio value. Otherwise, the group + * weight, if set, has priority over the ioprio value. + */ + if (bgrp->weight == 0) { + entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); + entity->new_ioprio = bgrp->ioprio; + } else { + entity->new_weight = bgrp->weight; + entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); + } + entity->orig_weight = entity->weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; + entity->my_sched_data = &bfqg->sched_data; + bfqg->active_entities = 0; +} + +static inline void bfq_group_set_parent(struct bfq_group *bfqg, + struct bfq_group *parent) +{ + struct bfq_entity *entity; + + BUG_ON(parent == NULL); + BUG_ON(bfqg == NULL); + + entity = &bfqg->entity; + entity->parent = parent->my_entity; + entity->sched_data = &parent->sched_data; +} + +/** + * bfq_group_chain_alloc - allocate a chain of groups. + * @bfqd: queue descriptor. + * @cgroup: the leaf cgroup this chain starts from. + * + * Allocate a chain of groups starting from the one belonging to + * @cgroup up to the root cgroup. Stop if a cgroup on the chain + * to the root has already an allocated group on @bfqd. + */ +static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; + + for (; cgroup != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) { + /* + * All the cgroups in the path from there to the + * root must have a bfq_group for bfqd, so we don't + * need any more allocations. + */ + break; + } + + bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); + if (bfqg == NULL) + goto cleanup; + + bfq_group_init_entity(bgrp, bfqg); + bfqg->my_entity = &bfqg->entity; + + if (leaf == NULL) { + leaf = bfqg; + prev = leaf; + } else { + bfq_group_set_parent(prev, bfqg); + /* + * Build a list of allocated nodes using the bfqd + * filed, that is still unused and will be + * initialized only after the node will be + * connected. + */ + prev->bfqd = bfqg; + prev = bfqg; + } + } + + return leaf; + +cleanup: + while (leaf != NULL) { + prev = leaf; + leaf = leaf->bfqd; + kfree(prev); + } + + return NULL; +} + +/** + * bfq_group_chain_link - link an allocated group chain to a cgroup + * hierarchy. + * @bfqd: the queue descriptor. + * @cgroup: the leaf cgroup to start from. + * @leaf: the leaf group (to be associated to @cgroup). + * + * Try to link a chain of groups to a cgroup hierarchy, connecting the + * nodes bottom-up, so we can be sure that when we find a cgroup in the + * hierarchy that already as a group associated to @bfqd all the nodes + * in the path to the root cgroup have one too. + * + * On locking: the queue lock protects the hierarchy (there is a hierarchy + * per device) while the bfqio_cgroup lock protects the list of groups + * belonging to the same cgroup. + */ +static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, + struct bfq_group *leaf) +{ + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *next, *prev = NULL; + unsigned long flags; + + assert_spin_locked(bfqd->queue->queue_lock); + + for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { + bgrp = cgroup_to_bfqio(cgroup); + next = leaf->bfqd; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + BUG_ON(bfqg != NULL); + + spin_lock_irqsave(&bgrp->lock, flags); + + rcu_assign_pointer(leaf->bfqd, bfqd); + hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); + hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); + + spin_unlock_irqrestore(&bgrp->lock, flags); + + prev = leaf; + leaf = next; + } + + BUG_ON(cgroup == NULL && leaf != NULL); + if (cgroup != NULL && prev != NULL) { + bgrp = cgroup_to_bfqio(cgroup); + bfqg = bfqio_lookup_group(bgrp, bfqd); + bfq_group_set_parent(prev, bfqg); + } +} + +/** + * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. + * @bfqd: queue descriptor. + * @cgroup: cgroup being searched for. + * + * Return a group associated to @bfqd in @cgroup, allocating one if + * necessary. When a group is returned all the cgroups in the path + * to the root have a group associated to @bfqd. + * + * If the allocation fails, return the root group: this breaks guarantees + * but is a safe fallback. If this loss becomes a problem it can be + * mitigated using the equivalent weight (given by the product of the + * weights of the groups in the path from @group to the root) in the + * root scheduler. + * + * We allocate all the missing nodes in the path from the leaf cgroup + * to the root and we connect the nodes only after all the allocations + * have been successful. + */ +static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct bfq_group *bfqg; + + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) + return bfqg; + + bfqg = bfq_group_chain_alloc(bfqd, cgroup); + if (bfqg != NULL) + bfq_group_chain_link(bfqd, cgroup, bfqg); + else + bfqg = bfqd->root_group; + + return bfqg; +} + +/** + * bfq_bfqq_move - migrate @bfqq to @bfqg. + * @bfqd: queue descriptor. + * @bfqq: the queue to move. + * @entity: @bfqq's entity. + * @bfqg: the group to move to. + * + * Move @bfqq to @bfqg, deactivating it from its old group and reactivating + * it on the new one. Avoid putting the entity on the old group idle tree. + * + * Must be called under the queue lock; the cgroup owning @bfqg must + * not disappear (by now this just means that we are called under + * rcu_read_lock()). + */ +static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct bfq_entity *entity, struct bfq_group *bfqg) +{ + int busy, resume; + + busy = bfq_bfqq_busy(bfqq); + resume = !RB_EMPTY_ROOT(&bfqq->sort_list); + + BUG_ON(resume && !entity->on_st); + BUG_ON(busy && !resume && entity->on_st && + bfqq != bfqd->in_service_queue); + + if (busy) { + BUG_ON(atomic_read(&bfqq->ref) < 2); + + if (!resume) + bfq_del_bfqq_busy(bfqd, bfqq, 0); + else + bfq_deactivate_bfqq(bfqd, bfqq, 0); + } else if (entity->on_st) + bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); + + /* + * Here we use a reference to bfqg. We don't need a refcounter + * as the cgroup reference will not be dropped, so that its + * destroy() callback will not be invoked. + */ + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; + + if (busy && resume) + bfq_activate_bfqq(bfqd, bfqq); + + if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); +} + +/** + * __bfq_cic_change_cgroup - move @cic to @cgroup. + * @bfqd: the queue descriptor. + * @cic: the cic to move. + * @cgroup: the cgroup to move to. + * + * Move cic to cgroup, assuming that bfqd->queue is locked; the caller + * has to make sure that the reference to cgroup is valid across the call. + * + * NOTE: an alternative approach might have been to store the current + * cgroup in bfqq and getting a reference to it, reducing the lookup + * time here, at the price of slightly more complex code. + */ +static struct bfq_group *__bfq_cic_change_cgroup(struct bfq_data *bfqd, + struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_queue *async_bfqq; + struct bfq_queue *sync_bfqq; + struct bfq_entity *entity; + struct bfq_group *bfqg; + + spin_lock(&bfqd->eqm_lock); + + async_bfqq = cic_to_bfqq(cic, 0); + sync_bfqq = cic_to_bfqq(cic, 1); + + bfqg = bfq_find_alloc_group(bfqd, cgroup); + if (async_bfqq != NULL) { + entity = &async_bfqq->entity; + + if (entity->sched_data != &bfqg->sched_data) { + cic_set_bfqq(cic, NULL, 0); + bfq_log_bfqq(bfqd, async_bfqq, + "cic_change_group: %p %d", + async_bfqq, atomic_read(&async_bfqq->ref)); + bfq_put_queue(async_bfqq); + } + } + + if (sync_bfqq != NULL) { + entity = &sync_bfqq->entity; + if (entity->sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); + } + + spin_unlock(&bfqd->eqm_lock); + + return bfqg; +} + +/** + * bfq_cic_change_cgroup - move @cic to @cgroup. + * @cic: the cic being migrated. + * @cgroup: the destination cgroup. + * + * When the task owning @cic is moved to @cgroup, @cic is immediately + * moved into its new parent group. + */ +static void bfq_cic_change_cgroup(struct cfq_io_context *cic, + struct cgroup *cgroup) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL && + !strncmp(bfqd->queue->elevator->elevator_type->elevator_name, + "bfq", ELV_NAME_MAX)) { + __bfq_cic_change_cgroup(bfqd, cic, cgroup); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_cic_update_cgroup - update the cgroup of @cic. + * @cic: the @cic to update. + * + * Make sure that @cic is enqueued in the cgroup of the current task. + * We need this in addition to moving cics during the cgroup attach + * phase because the task owning @cic could be at its first disk + * access or we may end up in the root cgroup as the result of a + * memory allocation failure and here we try to move to the right + * group. + * + * Must be called under the queue lock. It is safe to use the returned + * value even after the rcu_read_unlock() as the migration/destruction + * paths act under the queue lock too. IOW it is impossible to race with + * group migration/destruction and end up with an invalid group as: + * a) here cgroup has not yet been destroyed, nor its destroy callback + * has started execution, as current holds a reference to it, + * b) if it is destroyed after rcu_read_unlock() [after current is + * migrated to a different cgroup] its attach() callback will have + * taken care of remove all the references to the old cgroup data. + */ +static struct bfq_group *bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + struct bfq_group *bfqg; + struct cgroup *cgroup; + + BUG_ON(bfqd == NULL); + + rcu_read_lock(); + cgroup = task_cgroup(current, bfqio_subsys_id); + bfqg = __bfq_cic_change_cgroup(bfqd, cic, cgroup); + rcu_read_unlock(); + + return bfqg; +} + +/** + * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. + * @st: the service tree being flushed. + */ +static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) +{ + struct bfq_entity *entity = st->first_idle; + + for (; entity != NULL; entity = st->first_idle) + __bfq_deactivate_entity(entity, 0); +} + +/** + * bfq_reparent_leaf_entity - move leaf entity to the root_group. + * @bfqd: the device data structure with the root group. + * @entity: the entity to move. + */ +static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(bfqq == NULL); + bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); + return; +} + +/** + * bfq_reparent_active_entities - move to the root group all active + * entities. + * @bfqd: the device data structure with the root group. + * @bfqg: the group to move from. + * @st: the service tree with the entities. + * + * Needs queue_lock to be taken and reference to be valid over the call. + */ +static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st) +{ + struct rb_root *active = &st->active; + struct bfq_entity *entity = NULL; + + if (!RB_EMPTY_ROOT(&st->active)) + entity = bfq_entity_of(rb_first(active)); + + for (; entity != NULL; entity = bfq_entity_of(rb_first(active))) + bfq_reparent_leaf_entity(bfqd, entity); + + if (bfqg->sched_data.in_service_entity != NULL) + bfq_reparent_leaf_entity(bfqd, + bfqg->sched_data.in_service_entity); + + return; +} + +/** + * bfq_destroy_group - destroy @bfqg. + * @bgrp: the bfqio_cgroup containing @bfqg. + * @bfqg: the group being destroyed. + * + * Destroy @bfqg, making sure that it is not referenced from its parent. + */ +static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) +{ + struct bfq_data *bfqd; + struct bfq_service_tree *st; + struct bfq_entity *entity = bfqg->my_entity; + unsigned long uninitialized_var(flags); + int i; + + hlist_del(&bfqg->group_node); + + /* + * Empty all service_trees belonging to this group before + * deactivating the group itself. + */ + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { + st = bfqg->sched_data.service_tree + i; + + /* + * The idle tree may still contain bfq_queues belonging + * to exited task because they never migrated to a different + * cgroup from the one being destroyed now. No one else + * can access them so it's safe to act without any lock. + */ + bfq_flush_idle_tree(st); + + /* + * It may happen that some queues are still active + * (busy) upon group destruction (if the corresponding + * processes have been forced to terminate). We move + * all the leaf entities corresponding to these queues + * to the root_group. + * Also, it may happen that the group has an entity + * in service, which is disconnected from the active + * tree: it must be moved, too. + * There is no need to put the sync queues, as the + * scheduler has taken no reference. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(!RB_EMPTY_ROOT(&st->active)); + BUG_ON(!RB_EMPTY_ROOT(&st->idle)); + } + BUG_ON(bfqg->sched_data.next_in_service != NULL); + BUG_ON(bfqg->sched_data.in_service_entity != NULL); + + /* + * We may race with device destruction, take extra care when + * dereferencing bfqg->bfqd. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + hlist_del(&bfqg->bfqd_node); + __bfq_deactivate_entity(entity, 0); + bfq_put_async_queues(bfqd, bfqg); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(entity->tree != NULL); + + /* + * No need to defer the kfree() to the end of the RCU grace + * period: we are called from the destroy() callback of our + * cgroup, so we can be sure that no one is a) still using + * this cgroup or b) doing lookups in it. + */ + kfree(bfqg); +} + +static void bfq_end_wr_async(struct bfq_data *bfqd) +{ + struct hlist_node *pos, *n; + struct bfq_group *bfqg; + + hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) + bfq_end_wr_async_queues(bfqd, bfqg); + bfq_end_wr_async_queues(bfqd, bfqd->root_group); +} + +/** + * bfq_disconnect_groups - disconnect @bfqd from all its groups. + * @bfqd: the device descriptor being exited. + * + * When the device exits we just make sure that no lookup can return + * the now unused group structures. They will be deallocated on cgroup + * destruction. + */ +static void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + struct hlist_node *pos, *n; + struct bfq_group *bfqg; + + bfq_log(bfqd, "disconnect_groups beginning"); + hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) { + hlist_del(&bfqg->bfqd_node); + + __bfq_deactivate_entity(bfqg->my_entity, 0); + + /* + * Don't remove from the group hash, just set an + * invalid key. No lookups can race with the + * assignment as bfqd is being destroyed; this + * implies also that new elements cannot be added + * to the list. + */ + rcu_assign_pointer(bfqg->bfqd, NULL); + + bfq_log(bfqd, "disconnect_groups: put async for group %p", + bfqg); + bfq_put_async_queues(bfqd, bfqg); + } +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; + struct bfq_group *bfqg = bfqd->root_group; + + bfq_put_async_queues(bfqd, bfqg); + + spin_lock_irq(&bgrp->lock); + hlist_del_rcu(&bfqg->group_node); + spin_unlock_irq(&bgrp->lock); + + /* + * No need to synchronize_rcu() here: since the device is gone + * there cannot be any read-side access to its root_group. + */ + kfree(bfqg); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + struct bfqio_cgroup *bgrp; + int i; + + bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node); + if (bfqg == NULL) + return NULL; + + bfqg->entity.parent = NULL; + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + bgrp = &bfqio_root_cgroup; + spin_lock_irq(&bgrp->lock); + rcu_assign_pointer(bfqg->bfqd, bfqd); + hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); + spin_unlock_irq(&bgrp->lock); + + return bfqg; +} + +#define SHOW_FUNCTION(__VAR) \ +static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ + struct cftype *cftype) \ +{ \ + struct bfqio_cgroup *bgrp; \ + u64 ret; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + spin_lock_irq(&bgrp->lock); \ + ret = bgrp->__VAR; \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return ret; \ +} + +SHOW_FUNCTION(weight); +SHOW_FUNCTION(ioprio); +SHOW_FUNCTION(ioprio_class); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ +static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ + struct cftype *cftype, \ + u64 val) \ +{ \ + struct bfqio_cgroup *bgrp; \ + struct bfq_group *bfqg; \ + struct hlist_node *n; \ + \ + if (val < (__MIN) || val > (__MAX)) \ + return -EINVAL; \ + \ + if (!cgroup_lock_live_group(cgroup)) \ + return -ENODEV; \ + \ + bgrp = cgroup_to_bfqio(cgroup); \ + \ + spin_lock_irq(&bgrp->lock); \ + bgrp->__VAR = (unsigned short)val; \ + hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \ + /* \ + * Setting the ioprio_changed flag of the entity \ + * to 1 with new_##__VAR == ##__VAR would re-set \ + * the value of the weight to its ioprio mapping. \ + * Set the flag only if necessary. \ + */ \ + if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ + bfqg->entity.new_##__VAR = (unsigned short)val; \ + /* \ + * Make sure that the above new value has been \ + * stored in bfqg->entity.new_##__VAR before \ + * setting the ioprio_changed flag. In fact, \ + * this flag may be read asynchronously (in \ + * critical sections protected by a different \ + * lock than that held here), and finding this \ + * flag set may cause the execution of the code \ + * for updating parameters whose value may \ + * depend also on bfqg->entity.new_##__VAR (in \ + * __bfq_entity_update_weight_prio). \ + * This barrier makes sure that the new value \ + * of bfqg->entity.new_##__VAR is correctly \ + * seen in that code. \ + */ \ + smp_wmb(); \ + bfqg->entity.ioprio_changed = 1; \ + } \ + } \ + spin_unlock_irq(&bgrp->lock); \ + \ + cgroup_unlock(); \ + \ + return 0; \ +} + +STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); +STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); +STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); +#undef STORE_FUNCTION + +static struct cftype bfqio_files[] = { + { + .name = "weight", + .read_u64 = bfqio_cgroup_weight_read, + .write_u64 = bfqio_cgroup_weight_write, + }, + { + .name = "ioprio", + .read_u64 = bfqio_cgroup_ioprio_read, + .write_u64 = bfqio_cgroup_ioprio_write, + }, + { + .name = "ioprio_class", + .read_u64 = bfqio_cgroup_ioprio_class_read, + .write_u64 = bfqio_cgroup_ioprio_class_write, + }, +}; + +static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, subsys, bfqio_files, + ARRAY_SIZE(bfqio_files)); +} + +static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys, + struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp; + + if (cgroup->parent != NULL) { + bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); + if (bgrp == NULL) + return ERR_PTR(-ENOMEM); + } else + bgrp = &bfqio_root_cgroup; + + spin_lock_init(&bgrp->lock); + INIT_HLIST_HEAD(&bgrp->group_data); + bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; + bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; + + return &bgrp->css; +} + +/* + * We cannot support shared io contexts, as we have no means to support + * two tasks with the same ioc in two different groups without major rework + * of the main cic/bfqq data structures. By now we allow a task to change + * its cgroup only if it's the only owner of its ioc; the drawback of this + * behavior is that a group containing a task that forked using CLONE_IO + * will not be destroyed until the tasks sharing the ioc die. + */ +static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct task_struct *tsk) +{ + struct io_context *ioc; + int ret = 0; + + /* task_lock() is needed to avoid races with exit_io_context() */ + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) + /* + * ioc == NULL means that the task is either too + * young or exiting: if it has still no ioc the + * ioc can't be shared, if the task is exiting the + * attach will fail anyway, no matter what we + * return here. + */ + ret = -EINVAL; + task_unlock(tsk); + + return ret; +} + +static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, + struct cgroup *prev, struct task_struct *tsk) +{ + struct io_context *ioc; + struct cfq_io_context *cic; + struct hlist_node *n; + + task_lock(tsk); + ioc = tsk->io_context; + if (ioc != NULL) { + BUG_ON(atomic_long_read(&ioc->refcount) == 0); + atomic_long_inc(&ioc->refcount); + } + task_unlock(tsk); + + if (ioc == NULL) + return; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + bfq_cic_change_cgroup(cic, cgroup); + rcu_read_unlock(); + + put_io_context(ioc); +} + +static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) +{ + struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); + struct hlist_node *n, *tmp; + struct bfq_group *bfqg; + + /* + * Since we are destroying the cgroup, there are no more tasks + * referencing it, and all the RCU grace periods that may have + * referenced it are ended (as the destruction of the parent + * cgroup is RCU-safe); bgrp->group_data will not be accessed by + * anything else and we don't need any synchronization. + */ + hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node) + bfq_destroy_group(bgrp, bfqg); + + BUG_ON(!hlist_empty(&bgrp->group_data)); + + kfree(bgrp); +} + +struct cgroup_subsys bfqio_subsys = { + .name = "bfqio", + .create = bfqio_create, + .can_attach = bfqio_can_attach, + .attach = bfqio_attach, + .destroy = bfqio_destroy, + .populate = bfqio_populate, + .subsys_id = bfqio_subsys_id, +}; +#else +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) +{ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->sched_data = &bfqg->sched_data; +} + +static inline struct bfq_group * +bfq_cic_update_cgroup(struct cfq_io_context *cic) +{ + struct bfq_data *bfqd = cic->key; + return bfqd->root_group; +} + +static inline void bfq_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct bfq_entity *entity, + struct bfq_group *bfqg) +{ +} + +static void bfq_end_wr_async(struct bfq_data *bfqd) +{ + bfq_end_wr_async_queues(bfqd, bfqd->root_group); +} + +static inline void bfq_disconnect_groups(struct bfq_data *bfqd) +{ + bfq_put_async_queues(bfqd, bfqd->root_group); +} + +static inline void bfq_free_root_group(struct bfq_data *bfqd) +{ + kfree(bfqd->root_group); +} + +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + int i; + + bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); + if (bfqg == NULL) + return NULL; + + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; + + return bfqg; +} +#endif diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c new file mode 100644 index 00000000000..2dd6699cfa6 --- /dev/null +++ b/block/bfq-ioc.c @@ -0,0 +1,410 @@ +/* + * BFQ: I/O context handling. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +/** + * bfq_cic_free_rcu - deferred cic freeing. + * @head: RCU head of the cic to free. + * + * Free the cic containing @head and, if it was the last one and + * the module is exiting wake up anyone waiting for its deallocation + * (see bfq_exit()). + */ +static void bfq_cic_free_rcu(struct rcu_head *head) +{ + struct cfq_io_context *cic; + + cic = container_of(head, struct cfq_io_context, rcu_head); + + kmem_cache_free(bfq_ioc_pool, cic); + elv_ioc_count_dec(bfq_ioc_count); + + if (bfq_ioc_gone != NULL) { + spin_lock(&bfq_ioc_gone_lock); + if (bfq_ioc_gone != NULL && + !elv_ioc_count_read(bfq_ioc_count)) { + complete(bfq_ioc_gone); + bfq_ioc_gone = NULL; + } + spin_unlock(&bfq_ioc_gone_lock); + } +} + +static void bfq_cic_free(struct cfq_io_context *cic) +{ + call_rcu(&cic->rcu_head, bfq_cic_free_rcu); +} + +/** + * cic_free_func - disconnect a cic ready to be freed. + * @ioc: the io_context @cic belongs to. + * @cic: the cic to be freed. + * + * Remove @cic from the @ioc radix tree hash and from its cic list, + * deferring the deallocation of @cic to the end of the current RCU + * grace period. This assumes that __bfq_exit_single_io_context() + * has already been called for @cic. + */ +static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) +{ + unsigned long flags; + unsigned long dead_key = (unsigned long) cic->key; + + BUG_ON(!(dead_key & CIC_DEAD_KEY)); + + spin_lock_irqsave(&ioc->lock, flags); + radix_tree_delete(&ioc->bfq_radix_root, + dead_key >> CIC_DEAD_INDEX_SHIFT); + hlist_del_init_rcu(&cic->cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + bfq_cic_free(cic); +} + +static void bfq_free_io_context(struct io_context *ioc) +{ + /* + * ioc->refcount is zero here, or we are called from elv_unregister(), + * so no more cic's are allowed to be linked into this ioc. So it + * should be ok to iterate over the known list, we will see all cic's + * since no new ones are added. + */ + call_for_each_cic(ioc, cic_free_func); +} + +/** + * __bfq_exit_single_io_context - deassociate @cic from any running task. + * @bfqd: bfq_data on which @cic is valid. + * @cic: the cic being exited. + * + * Whenever no more tasks are using @cic or @bfqd is deallocated we + * need to invalidate its entry in the radix tree hash table and to + * release the queues it refers to. + * + * Called under the queue lock. + */ +static void __bfq_exit_single_io_context(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + struct io_context *ioc = cic->ioc; + + list_del_init(&cic->queue_list); + + /* + * Make sure dead mark is seen for dead queues + */ + smp_wmb(); + rcu_assign_pointer(cic->key, bfqd_dead_key(bfqd)); + + /* + * No write-side locking as no task is using @ioc (they're exited + * or bfqd is being deallocated. + */ + rcu_read_lock(); + if (rcu_dereference(ioc->ioc_data) == cic) { + rcu_read_unlock(); + spin_lock(&ioc->lock); + rcu_assign_pointer(ioc->ioc_data, NULL); + spin_unlock(&ioc->lock); + } else + rcu_read_unlock(); + + if (cic->cfqq[BLK_RW_ASYNC] != NULL) { + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_ASYNC]); + cic->cfqq[BLK_RW_ASYNC] = NULL; + } + + spin_lock(&bfqd->eqm_lock); + if (cic->cfqq[BLK_RW_SYNC] != NULL) { + /* + * If the bic is using a shared queue, put the reference + * taken on the io_context when the bic started using a + * shared bfq_queue. + */ + if (bfq_bfqq_coop(cic->cfqq[BLK_RW_SYNC])) + put_io_context(ioc); + bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_SYNC]); + cic->cfqq[BLK_RW_SYNC] = NULL; + } + spin_unlock(&bfqd->eqm_lock); +} + +/** + * bfq_exit_single_io_context - deassociate @cic from @ioc (unlocked version). + * @ioc: the io_context @cic belongs to. + * @cic: the cic being exited. + * + * Take the queue lock and call __bfq_exit_single_io_context() to do the + * rest of the work. We take care of possible races with bfq_exit_queue() + * using bfq_get_bfqd_locked() (and abusing a little bit the RCU mechanism). + */ +static void bfq_exit_single_io_context(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (bfqd != NULL) { + __bfq_exit_single_io_context(bfqd, cic); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_exit_io_context - deassociate @ioc from all cics it owns. + * @ioc: the @ioc being exited. + * + * No more processes are using @ioc we need to clean up and put the + * internal structures we have that belongs to that process. Loop + * through all its cics, locking their queues and exiting them. + */ +static void bfq_exit_io_context(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_exit_single_io_context); +} + +static struct cfq_io_context *bfq_alloc_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct cfq_io_context *cic; + + cic = kmem_cache_alloc_node(bfq_ioc_pool, gfp_mask | __GFP_ZERO, + bfqd->queue->node); + if (cic != NULL) { + cic->ttime.last_end_request = jiffies; + /* + * A newly created cic indicates that the process has just + * started doing I/O, and is probably mapping into memory its + * executable and libraries: it definitely needs weight raising. + * There is however the possibility that the process performs, + * for a while, I/O close to some other process. EQM intercepts + * this behavior and may merge the queue corresponding to the + * process with some other queue, BEFORE the weight of the queue + * is raised. Merged queues are not weight-raised (they are assumed + * to belong to processes that benefit only from high throughput). + * If the merge is basically the consequence of an accident, then + * the queue will be split soon and will get back its old weight. + * It is then important to write down somewhere that this queue + * does need weight raising, even if it did not make it to get its + * weight raised before being merged. To this purpose, we overload + * the field raising_time_left and assign 1 to it, to mark the queue + * as needing weight raising. + */ + cic->wr_time_left = 1; + INIT_LIST_HEAD(&cic->queue_list); + INIT_HLIST_NODE(&cic->cic_list); + cic->dtor = bfq_free_io_context; + cic->exit = bfq_exit_io_context; + elv_ioc_count_inc(bfq_ioc_count); + } + + return cic; +} + +/** + * bfq_drop_dead_cic - free an exited cic. + * @bfqd: bfq data for the device in use. + * @ioc: io_context owning @cic. + * @cic: the @cic to free. + * + * We drop cfq io contexts lazily, so we may find a dead one. + */ +static void bfq_drop_dead_cic(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic) +{ + unsigned long flags; + + WARN_ON(!list_empty(&cic->queue_list)); + BUG_ON(cic->key != bfqd_dead_key(bfqd)); + + spin_lock_irqsave(&ioc->lock, flags); + + BUG_ON(ioc->ioc_data == cic); + + /* + * With shared I/O contexts two lookups may race and drop the + * same cic more than one time: RCU guarantees that the storage + * will not be freed too early, here we make sure that we do + * not try to remove the cic from the hashing structures multiple + * times. + */ + if (!hlist_unhashed(&cic->cic_list)) { + radix_tree_delete(&ioc->bfq_radix_root, bfqd->cic_index); + hlist_del_init_rcu(&cic->cic_list); + bfq_cic_free(cic); + } + + spin_unlock_irqrestore(&ioc->lock, flags); +} + +/** + * bfq_cic_lookup - search into @ioc a cic associated to @bfqd. + * @bfqd: the lookup key. + * @ioc: the io_context of the process doing I/O. + * + * If @ioc already has a cic associated to @bfqd return it, return %NULL + * otherwise. + */ +static struct cfq_io_context *bfq_cic_lookup(struct bfq_data *bfqd, + struct io_context *ioc) +{ + struct cfq_io_context *cic; + unsigned long flags; + void *k; + + if (unlikely(ioc == NULL)) + return NULL; + + rcu_read_lock(); + + /* We maintain a last-hit cache, to avoid browsing over the tree. */ + cic = rcu_dereference(ioc->ioc_data); + if (cic != NULL) { + k = rcu_dereference(cic->key); + if (k == bfqd) + goto out; + } + + do { + cic = radix_tree_lookup(&ioc->bfq_radix_root, + bfqd->cic_index); + if (cic == NULL) + goto out; + + k = rcu_dereference(cic->key); + if (unlikely(k != bfqd)) { + rcu_read_unlock(); + bfq_drop_dead_cic(bfqd, ioc, cic); + rcu_read_lock(); + continue; + } + + spin_lock_irqsave(&ioc->lock, flags); + rcu_assign_pointer(ioc->ioc_data, cic); + spin_unlock_irqrestore(&ioc->lock, flags); + break; + } while (1); + +out: + rcu_read_unlock(); + + return cic; +} + +/** + * bfq_cic_link - add @cic to @ioc. + * @bfqd: bfq_data @cic refers to. + * @ioc: io_context @cic belongs to. + * @cic: the cic to link. + * @gfp_mask: the mask to use for radix tree preallocations. + * + * Add @cic to @ioc, using @bfqd as the search key. This enables us to + * lookup the process specific cfq io context when entered from the block + * layer. Also adds @cic to a per-bfqd list, used when this queue is + * removed. + */ +static int bfq_cic_link(struct bfq_data *bfqd, struct io_context *ioc, + struct cfq_io_context *cic, gfp_t gfp_mask) +{ + unsigned long flags; + int ret; + + ret = radix_tree_preload(gfp_mask); + if (ret == 0) { + cic->ioc = ioc; + + /* No write-side locking, cic is not published yet. */ + rcu_assign_pointer(cic->key, bfqd); + + spin_lock_irqsave(&ioc->lock, flags); + ret = radix_tree_insert(&ioc->bfq_radix_root, + bfqd->cic_index, cic); + if (ret == 0) + hlist_add_head_rcu(&cic->cic_list, &ioc->bfq_cic_list); + spin_unlock_irqrestore(&ioc->lock, flags); + + radix_tree_preload_end(); + + if (ret == 0) { + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + list_add(&cic->queue_list, &bfqd->cic_list); + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); + } + } + + if (ret != 0) + printk(KERN_ERR "bfq: cic link failed!\n"); + + return ret; +} + +/** + * bfq_ioc_set_ioprio - signal a priority change to the cics belonging to @ioc. + * @ioc: the io_context changing its priority. + */ +static inline void bfq_ioc_set_ioprio(struct io_context *ioc) +{ + call_for_each_cic(ioc, bfq_changed_ioprio); +} + +/** + * bfq_get_io_context - return the @cic associated to @bfqd in @ioc. + * @bfqd: the search key. + * @gfp_mask: the mask to use for cic allocation. + * + * Setup general io context and cfq io context. There can be several cfq + * io contexts per general io context, if this process is doing io to more + * than one device managed by cfq. + */ +static struct cfq_io_context *bfq_get_io_context(struct bfq_data *bfqd, + gfp_t gfp_mask) +{ + struct io_context *ioc = NULL; + struct cfq_io_context *cic; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + ioc = get_io_context(gfp_mask, bfqd->queue->node); + if (ioc == NULL) + return NULL; + + /* Lookup for an existing cic. */ + cic = bfq_cic_lookup(bfqd, ioc); + if (cic != NULL) + goto out; + + /* Alloc one if needed. */ + cic = bfq_alloc_io_context(bfqd, gfp_mask); + if (cic == NULL) + goto err; + + /* Link it into the ioc's radix tree and cic list. */ + if (bfq_cic_link(bfqd, ioc, cic, gfp_mask) != 0) + goto err_free; + +out: + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_BFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) + bfq_ioc_set_ioprio(ioc); + + return cic; +err_free: + bfq_cic_free(cic); +err: + put_io_context(ioc); + return NULL; +} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c new file mode 100644 index 00000000000..ee5a4aeaf43 --- /dev/null +++ b/block/bfq-iosched.c @@ -0,0 +1,3956 @@ +/* + * Budget Fair Queueing (BFQ) disk scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ + * file. + * + * BFQ is a proportional-share storage-I/O scheduling algorithm based on + * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets, + * measured in number of sectors, to processes instead of time slices. The + * device is not granted to the in-service process for a given time slice, + * but until it has exhausted its assigned budget. This change from the time + * to the service domain allows BFQ to distribute the device throughput + * among processes as desired, without any distortion due to ZBR, workload + * fluctuations or other factors. BFQ uses an ad hoc internal scheduler, + * called B-WF2Q+, to schedule processes according to their budgets. More + * precisely, BFQ schedules queues associated to processes. Thanks to the + * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to + * I/O-bound processes issuing sequential requests (to boost the + * throughput), and yet guarantee a low latency to interactive and soft + * real-time applications. + * + * BFQ is described in [1], where also a reference to the initial, more + * theoretical paper on BFQ can be found. The interested reader can find + * in the latter paper full details on the main algorithm, as well as + * formulas of the guarantees and formal proofs of all the properties. + * With respect to the version of BFQ presented in these papers, this + * implementation adds a few more heuristics, such as the one that + * guarantees a low latency to soft real-time applications, and a + * hierarchical extension based on H-WF2Q+. + * + * B-WF2Q+ is based on WF2Q+, that is described in [2], together with + * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) + * complexity derives from the one introduced with EEVDF in [3]. + * + * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness + * with the BFQ Disk I/O Scheduler'', + * Proceedings of the 5th Annual International Systems and Storage + * Conference (SYSTOR '12), June 2012. + * + * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf + * + * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing + * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, + * Oct 1997. + * + * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz + * + * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline + * First: A Flexible and Accurate Mechanism for Proportional Share + * Resource Allocation,'' technical report. + * + * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bfq.h" + +/* Max number of dispatches in one round of service. */ +static const int bfq_quantum = 4; + +/* Expiration time of sync (0) and async (1) requests, in jiffies. */ +static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; + +/* Maximum backwards seek, in KiB. */ +static const int bfq_back_max = 16 * 1024; + +/* Penalty of a backwards seek, in number of sectors. */ +static const int bfq_back_penalty = 2; + +/* Idling period duration, in jiffies. */ +static int bfq_slice_idle = HZ / 125; + +/* Default maximum budget values, in sectors and number of requests. */ +static const int bfq_default_max_budget = 16 * 1024; +static const int bfq_max_budget_async_rq = 4; + +/* + * Async to sync throughput distribution is controlled as follows: + * when an async request is served, the entity is charged the number + * of sectors of the request, multiplied by the factor below + */ +static const int bfq_async_charge_factor = 10; + +/* Default timeout values, in jiffies, approximating CFQ defaults. */ +static const int bfq_timeout_sync = HZ / 8; +static int bfq_timeout_async = HZ / 25; + +struct kmem_cache *bfq_pool; +struct kmem_cache *bfq_ioc_pool; + +static DEFINE_PER_CPU(unsigned long, bfq_ioc_count); +static struct completion *bfq_ioc_gone; +static DEFINE_SPINLOCK(bfq_ioc_gone_lock); + +static DEFINE_SPINLOCK(cic_index_lock); +static DEFINE_IDA(cic_index_ida); + +/* Below this threshold (in ms), we consider thinktime immediate. */ +#define BFQ_MIN_TT 2 + +/* hw_tag detection: parallel requests threshold and min samples needed. */ +#define BFQ_HW_QUEUE_THRESHOLD 4 +#define BFQ_HW_QUEUE_SAMPLES 32 + +#define BFQQ_SEEK_THR (sector_t)(8 * 1024) +#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) + +/* Min samples used for peak rate estimation (for autotuning). */ +#define BFQ_PEAK_RATE_SAMPLES 32 + +/* Shift used for peak rate fixed precision calculations. */ +#define BFQ_RATE_SHIFT 16 + +/* + * By default, BFQ computes the duration of the weight raising for + * interactive applications automatically, using the following formula: + * duration = (R / r) * T, where r is the peak rate of the device, and + * R and T are two reference parameters. + * In particular, R is the peak rate of the reference device (see below), + * and T is a reference time: given the systems that are likely to be + * installed on the reference device according to its speed class, T is + * about the maximum time needed, under BFQ and while reading two files in + * parallel, to load typical large applications on these systems. + * In practice, the slower/faster the device at hand is, the more/less it + * takes to load applications with respect to the reference device. + * Accordingly, the longer/shorter BFQ grants weight raising to interactive + * applications. + * + * BFQ uses four different reference pairs (R, T), depending on: + * . whether the device is rotational or non-rotational; + * . whether the device is slow, such as old or portable HDDs, as well as + * SD cards, or fast, such as newer HDDs and SSDs. + * + * The device's speed class is dynamically (re)detected in + * bfq_update_peak_rate() every time the estimated peak rate is updated. + * + * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0] + * are the reference values for a slow/fast rotational device, whereas + * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for + * a slow/fast non-rotational device. Finally, device_speed_thresh are the + * thresholds used to switch between speed classes. + * Both the reference peak rates and the thresholds are measured in + * sectors/usec, left-shifted by BFQ_RATE_SHIFT. + */ +static int R_slow[2] = {1536, 10752}; +static int R_fast[2] = {17415, 34791}; +/* + * To improve readability, a conversion function is used to initialize the + * following arrays, which entails that they can be initialized only in a + * function. + */ +static int T_slow[2]; +static int T_fast[2]; +static int device_speed_thresh[2]; + +#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ + { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) + +#define RQ_CIC(rq) \ + ((struct cfq_io_context *) (rq)->elevator_private[0]) +#define RQ_BFQQ(rq) ((rq)->elevator_private[1]) + +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); + +#include "bfq-ioc.c" +#include "bfq-sched.c" +#include "bfq-cgroup.c" + +#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_IDLE) +#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ + IOPRIO_CLASS_RT) + +#define bfq_sample_valid(samples) ((samples) > 80) + +/* + * We regard a request as SYNC, if either it's a read or has the SYNC bit + * set (in which case it could also be a direct WRITE). + */ +static inline int bfq_bio_sync(struct bio *bio) +{ + if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) + return 1; + + return 0; +} + +/* + * Scheduler run of queue, if there are requests pending and no one in the + * driver that will restart queueing. + */ +static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) +{ + if (bfqd->queued != 0) { + bfq_log(bfqd, "schedule dispatch"); + kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); + } +} + +/* + * Lifted from AS - choose which of rq1 and rq2 that is best served now. + * We choose the request that is closesr to the head right now. Distance + * behind the head is penalized and only allowed to a certain extent. + */ +static struct request *bfq_choose_req(struct bfq_data *bfqd, + struct request *rq1, + struct request *rq2, + sector_t last) +{ + sector_t s1, s2, d1 = 0, d2 = 0; + unsigned long back_max; +#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ +#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ + unsigned wrap = 0; /* bit mask: requests behind the disk head? */ + + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; + + if (rq_is_sync(rq1) && !rq_is_sync(rq2)) + return rq1; + else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) + return rq2; + if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) + return rq1; + else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) + return rq2; + + s1 = blk_rq_pos(rq1); + s2 = blk_rq_pos(rq2); + + /* + * By definition, 1KiB is 2 sectors. + */ + back_max = bfqd->bfq_back_max * 2; + + /* + * Strict one way elevator _except_ in the case where we allow + * short backward seeks which are biased as twice the cost of a + * similar forward seek. + */ + if (s1 >= last) + d1 = s1 - last; + else if (s1 + back_max >= last) + d1 = (last - s1) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ1_WRAP; + + if (s2 >= last) + d2 = s2 - last; + else if (s2 + back_max >= last) + d2 = (last - s2) * bfqd->bfq_back_penalty; + else + wrap |= BFQ_RQ2_WRAP; + + /* Found required data */ + + /* + * By doing switch() on the bit mask "wrap" we avoid having to + * check two variables for all permutations: --> faster! + */ + switch (wrap) { + case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ + if (d1 < d2) + return rq1; + else if (d2 < d1) + return rq2; + else { + if (s1 >= s2) + return rq1; + else + return rq2; + } + + case BFQ_RQ2_WRAP: + return rq1; + case BFQ_RQ1_WRAP: + return rq2; + case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ + default: + /* + * Since both rqs are wrapped, + * start with the one that's further behind head + * (--> only *one* back seek required), + * since back seek takes more time than forward. + */ + if (s1 <= s2) + return rq1; + else + return rq2; + } +} + +static struct bfq_queue * +bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, + sector_t sector, struct rb_node **ret_parent, + struct rb_node ***rb_link) +{ + struct rb_node **p, *parent; + struct bfq_queue *bfqq = NULL; + + parent = NULL; + p = &root->rb_node; + while (*p) { + struct rb_node **n; + + parent = *p; + bfqq = rb_entry(parent, struct bfq_queue, pos_node); + + /* + * Sort strictly based on sector. Smallest to the left, + * largest to the right. + */ + if (sector > blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_right; + else if (sector < blk_rq_pos(bfqq->next_rq)) + n = &(*p)->rb_left; + else + break; + p = n; + bfqq = NULL; + } + + *ret_parent = parent; + if (rb_link) + *rb_link = p; + + bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", + (long long unsigned)sector, + bfqq != NULL ? bfqq->pid : 0); + + return bfqq; +} + +static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct rb_node **p, *parent; + struct bfq_queue *__bfqq; + + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + + if (bfq_class_idle(bfqq)) + return; + if (!bfqq->next_rq) + return; + + bfqq->pos_root = &bfqd->rq_pos_tree; + __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, + blk_rq_pos(bfqq->next_rq), &parent, &p); + if (__bfqq == NULL) { + rb_link_node(&bfqq->pos_node, parent, p); + rb_insert_color(&bfqq->pos_node, bfqq->pos_root); + } else + bfqq->pos_root = NULL; +} + +/* + * Tell whether there are active queues or groups with differentiated weights. + */ +static inline bool bfq_differentiated_weights(struct bfq_data *bfqd) +{ + BUG_ON(!bfqd->hw_tag); + /* + * For weights to differ, at least one of the trees must contain + * at least two nodes. + */ + return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && + (bfqd->queue_weights_tree.rb_node->rb_left || + bfqd->queue_weights_tree.rb_node->rb_right) +#ifdef CONFIG_CGROUP_BFQIO + ) || + (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && + (bfqd->group_weights_tree.rb_node->rb_left || + bfqd->group_weights_tree.rb_node->rb_right) +#endif + ); +} + +/* + * If the weight-counter tree passed as input contains no counter for + * the weight of the input entity, then add that counter; otherwise just + * increment the existing counter. + * + * Note that weight-counter trees contain few nodes in mostly symmetric + * scenarios. For example, if all queues have the same weight, then the + * weight-counter tree for the queues may contain at most one node. + * This holds even if low_latency is on, because weight-raised queues + * are not inserted in the tree. + * In most scenarios, the rate at which nodes are created/destroyed + * should be low too. + */ +static void bfq_weights_tree_add(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* + * Do not insert if: + * - the device does not support queueing; + * - the entity is already associated with a counter, which happens if: + * 1) the entity is associated with a queue, 2) a request arrival + * has caused the queue to become both non-weight-raised, and hence + * change its weight, and backlogged; in this respect, each + * of the two events causes an invocation of this function, + * 3) this is the invocation of this function caused by the second + * event. This second invocation is actually useless, and we handle + * this fact by exiting immediately. More efficient or clearer + * solutions might possibly be adopted. + */ + if (!bfqd->hw_tag || entity->weight_counter) + return; + + while (*new) { + struct bfq_weight_counter *__counter = container_of(*new, + struct bfq_weight_counter, + weights_node); + parent = *new; + + if (entity->weight == __counter->weight) { + entity->weight_counter = __counter; + goto inc_counter; + } + if (entity->weight < __counter->weight) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), + GFP_ATOMIC); + entity->weight_counter->weight = entity->weight; + rb_link_node(&entity->weight_counter->weights_node, parent, new); + rb_insert_color(&entity->weight_counter->weights_node, root); + +inc_counter: + entity->weight_counter->num_active++; +} + +/* + * Decrement the weight counter associated with the entity, and, if the + * counter reaches 0, remove the counter from the tree. + * See the comments to the function bfq_weights_tree_add() for considerations + * about overhead. + */ +static void bfq_weights_tree_remove(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root) +{ + /* + * Check whether the entity is actually associated with a counter. + * In fact, the device may not be considered NCQ-capable for a while, + * which implies that no insertion in the weight trees is performed, + * after which the device may start to be deemed NCQ-capable, and hence + * this function may start to be invoked. This may cause the function + * to be invoked for entities that are not associated with any counter. + */ + if (!entity->weight_counter) + return; + + BUG_ON(RB_EMPTY_ROOT(root)); + BUG_ON(entity->weight_counter->weight != entity->weight); + + BUG_ON(!entity->weight_counter->num_active); + entity->weight_counter->num_active--; + if (entity->weight_counter->num_active > 0) + goto reset_entity_pointer; + + rb_erase(&entity->weight_counter->weights_node, root); + kfree(entity->weight_counter); + +reset_entity_pointer: + entity->weight_counter = NULL; +} + +static struct request *bfq_find_next_rq(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *last) +{ + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct request *next = NULL, *prev = NULL; + + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); + + if (rbprev != NULL) + prev = rb_entry_rq(rbprev); + + if (rbnext != NULL) + next = rb_entry_rq(rbnext); + else { + rbnext = rb_first(&bfqq->sort_list); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } + + return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); +} + +/* see the definition of bfq_async_charge_factor for details */ +static inline unsigned long bfq_serv_to_charge(struct request *rq, + struct bfq_queue *bfqq) +{ + return blk_rq_sectors(rq) * + (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) * + bfq_async_charge_factor)); +} + +/** + * bfq_updated_next_req - update the queue after a new next_rq selection. + * @bfqd: the device data the queue belongs to. + * @bfqq: the queue to update. + * + * If the first request of a queue changes we make sure that the queue + * has enough budget to serve at least its first request (if the + * request has grown). We do this because if the queue has not enough + * budget for its first request, it has to go through two dispatch + * rounds to actually get it dispatched. + */ +static void bfq_updated_next_req(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + struct request *next_rq = bfqq->next_rq; + unsigned long new_budget; + + if (next_rq == NULL) + return; + + if (bfqq == bfqd->in_service_queue) + /* + * In order not to break guarantees, budgets cannot be + * changed after an entity has been selected. + */ + return; + + BUG_ON(entity->tree != &st->active); + BUG_ON(entity == entity->sched_data->in_service_entity); + + new_budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + if (entity->budget != new_budget) { + entity->budget = new_budget; + bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", + new_budget); + bfq_activate_bfqq(bfqd, bfqq); + } +} + +static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) +{ + u64 dur; + + if (bfqd->bfq_wr_max_time > 0) + return bfqd->bfq_wr_max_time; + + dur = bfqd->RT_prod; + do_div(dur, bfqd->peak_rate); + + return dur; +} + +static inline unsigned +bfq_bfqq_cooperations(struct bfq_queue *bfqq) +{ + return bfqq->cic ? bfqq->cic->cooperations : 0; +} + +static inline void +bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct cfq_io_context *cic) +{ + if (cic->saved_idle_window) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); + if (cic->saved_IO_bound) + bfq_mark_bfqq_IO_bound(bfqq); + else + bfq_clear_bfqq_IO_bound(bfqq); + if (cic->wr_time_left && bfqq->bfqd->low_latency && + cic->cooperations < bfqq->bfqd->bfq_coop_thresh) { + /* + * Start a weight raising period with the duration given by + * the raising_time_left snapshot. + */ + if (bfq_bfqq_busy(bfqq)) + bfqq->bfqd->wr_busy_queues++; + bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = cic->wr_time_left; + bfqq->last_wr_start_finish = jiffies; + bfqq->entity.ioprio_changed = 1; + } + /* + * Clear raising_time_left to prevent bfq_bfqq_save_state() from + * getting confused about the queue's need of a weight-raising + * period. + */ + cic->wr_time_left = 0; +} + +/* + * Must be called with the queue_lock held. + */ +static int bfqq_process_refs(struct bfq_queue *bfqq) +{ + int process_refs, io_refs; + + io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; + process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; + BUG_ON(process_refs < 0); + return process_refs; +} + +static void bfq_add_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_entity *entity = &bfqq->entity; + struct bfq_data *bfqd = bfqq->bfqd; + struct request *next_rq, *prev; + unsigned long old_wr_coeff = bfqq->wr_coeff; + int idle_for_long_time = 0; + + bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); + bfqq->queued[rq_is_sync(rq)]++; + bfqd->queued++; + + elv_rb_add(&bfqq->sort_list, rq); + + spin_lock(&bfqd->eqm_lock); + + /* + * Check if this request is a better next-serve candidate. + */ + prev = bfqq->next_rq; + next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); + BUG_ON(next_rq == NULL); + bfqq->next_rq = next_rq; + + /* + * Adjust priority tree position, if next_rq changes. + */ + if (prev != bfqq->next_rq) + bfq_rq_pos_tree_add(bfqd, bfqq); + + spin_unlock(&bfqd->eqm_lock); + + if (!bfq_bfqq_busy(bfqq)) { + int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && + bfq_bfqq_cooperations(bfqq) < bfqd->bfq_coop_thresh && + time_is_before_jiffies(bfqq->soft_rt_next_start); + idle_for_long_time = bfq_bfqq_cooperations(bfqq) < + bfqd->bfq_coop_thresh && + time_is_before_jiffies( + bfqq->budget_timeout + + bfqd->bfq_wr_min_idle_time); + entity->budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + + if (!bfq_bfqq_IO_bound(bfqq)) { + if (time_before(jiffies, + RQ_CIC(rq)->ttime.last_end_request + + bfqd->bfq_slice_idle)) { + bfqq->requests_within_timer++; + if (bfqq->requests_within_timer >= + bfqd->bfq_requests_within_timer) + bfq_mark_bfqq_IO_bound(bfqq); + } else + bfqq->requests_within_timer = 0; + } + + if (!bfqd->low_latency) + goto add_bfqq_busy; + + if (bfq_bfqq_just_split(bfqq)) + goto set_ioprio_changed; + + /* + * If the queue: + * - is not being boosted, + * - has been idle for enough time, + * - is not a sync queue or is linked to a cfq_io_context (it is + * shared "for its nature" or it is not shared and its + * requests have not been redirected to a shared queue) + * start a weight-raising period. + */ + if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt) && + (!bfq_bfqq_sync(bfqq) || bfqq->cic != NULL)) { + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + if (idle_for_long_time) + bfqq->wr_cur_max_time = + bfq_wr_duration(bfqd); + else + bfqq->wr_cur_max_time = + bfqd->bfq_wr_rt_max_time; + bfq_log_bfqq(bfqd, bfqq, + "wrais starting at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq-> + wr_cur_max_time)); + } else if (old_wr_coeff > 1) { + if (idle_for_long_time) + bfqq->wr_cur_max_time = + bfq_wr_duration(bfqd); + else if (bfq_bfqq_cooperations(bfqq) >= + bfqd->bfq_coop_thresh || + (bfqq->wr_cur_max_time == + bfqd->bfq_wr_rt_max_time && + !soft_rt)) { + bfqq->wr_coeff = 1; + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq-> + wr_cur_max_time)); + } else if (time_before( + bfqq->last_wr_start_finish + + bfqq->wr_cur_max_time, + jiffies + + bfqd->bfq_wr_rt_max_time) && + soft_rt) { + /* + * + * The remaining weight-raising time is lower + * than bfqd->bfq_raising_rt_max_time, which + * means that the application is enjoying + * weight raising either because deemed soft- + * rt in the near past, or because deemed + * interactive a long ago. In both cases, + * resetting now the current remaining weight- + * raising time for the application to the + * weight-raising duration for soft rt + * applications would not cause any latency + * increase for the application (as the new + * duration would be higher than the remaining + * time). + * + * In addition, the application is now meeting + * the requirements for being deemed soft rt. + * In the end we can correctly and safely + * (re)charge the weight-raising duration for + * the application with the weight-raising + * duration for soft rt applications. + * + * In particular, doing this recharge now, i.e., + * before the weight-raising period for the + * application finishes, reduces the probability + * of the following negative scenario: + * 1) the weight of a soft rt application is + * raised at startup (as for any newly + * created application), + * 2) since the application is not interactive, + * at a certain time weight-raising is + * stopped for the application, + * 3) at that time the application happens to + * still have pending requests, and hence + * is destined to not have a chance to be + * deemed soft rt before these requests are + * completed (see the comments to the + * function bfq_bfqq_softrt_next_start() + * for details on soft rt detection), + * 4) these pending requests experience a high + * latency because the application is not + * weight-raised while they are pending. + */ + bfqq->last_wr_start_finish = jiffies; + bfqq->wr_cur_max_time = + bfqd->bfq_wr_rt_max_time; + } + } +set_ioprio_changed: + if (old_wr_coeff != bfqq->wr_coeff) + entity->ioprio_changed = 1; +add_bfqq_busy: + bfqq->last_idle_bklogged = jiffies; + bfqq->service_from_backlogged = 0; + bfq_clear_bfqq_softrt_update(bfqq); + bfq_add_bfqq_busy(bfqd, bfqq); + } else { + if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && + time_is_before_jiffies( + bfqq->last_wr_start_finish + + bfqd->bfq_wr_min_inter_arr_async)) { + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); + + bfqd->wr_busy_queues++; + entity->ioprio_changed = 1; + bfq_log_bfqq(bfqd, bfqq, + "non-idle wrais starting at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + if (prev != bfqq->next_rq) + bfq_updated_next_req(bfqd, bfqq); + } + + if (bfqd->low_latency && + (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || + idle_for_long_time)) + bfqq->last_wr_start_finish = jiffies; +} + +static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, + struct bio *bio) +{ + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return NULL; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) { + sector_t sector = bio->bi_sector + bio_sectors(bio); + + return elv_rb_find(&bfqq->sort_list, sector); + } + + return NULL; +} + +static void bfq_activate_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + bfqd->rq_in_driver++; + bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); + bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", + (long long unsigned)bfqd->last_position); +} + +static inline void bfq_deactivate_request(struct request_queue *q, + struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + + BUG_ON(bfqd->rq_in_driver == 0); + bfqd->rq_in_driver--; +} + +static void bfq_remove_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + const int sync = rq_is_sync(rq); + + spin_lock(&bfqq->bfqd->eqm_lock); + if (bfqq->next_rq == rq) { + bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); + bfq_updated_next_req(bfqd, bfqq); + } + + list_del_init(&rq->queuelist); + BUG_ON(bfqq->queued[sync] == 0); + bfqq->queued[sync]--; + bfqd->queued--; + elv_rb_del(&bfqq->sort_list, rq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) + bfq_del_bfqq_busy(bfqd, bfqq, 1); + /* + * Remove queue from request-position tree as it is empty. + */ + if (bfqq->pos_root != NULL) { + rb_erase(&bfqq->pos_node, bfqq->pos_root); + bfqq->pos_root = NULL; + } + } + spin_unlock(&bfqq->bfqd->eqm_lock); + + if (rq->cmd_flags & REQ_META) { + BUG_ON(bfqq->meta_pending == 0); + bfqq->meta_pending--; + } +} + +static int bfq_merge(struct request_queue *q, struct request **req, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct request *__rq; + + __rq = bfq_find_rq_fmerge(bfqd, bio); + if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + + return ELEVATOR_NO_MERGE; +} + +static void bfq_merged_request(struct request_queue *q, struct request *req, + int type) +{ + if (type == ELEVATOR_FRONT_MERGE && + rb_prev(&req->rb_node) && + blk_rq_pos(req) < + blk_rq_pos(container_of(rb_prev(&req->rb_node), + struct request, rb_node))) { + struct bfq_queue *bfqq = RQ_BFQQ(req); + struct bfq_data *bfqd = bfqq->bfqd; + struct request *prev, *next_rq; + + /* Reposition request in its sort_list */ + elv_rb_del(&bfqq->sort_list, req); + elv_rb_add(&bfqq->sort_list, req); + /* Choose next request to be served for bfqq */ + prev = bfqq->next_rq; + next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req, + bfqd->last_position); + BUG_ON(next_rq == NULL); + bfqq->next_rq = next_rq; + /* + * If next_rq changes, update both the queue's budget to + * fit the new request and the queue's position in its + * rq_pos_tree. + */ + if (prev != bfqq->next_rq) { + bfq_updated_next_req(bfqd, bfqq); + bfq_rq_pos_tree_add(bfqd, bfqq); + } + } +} + +static void bfq_merged_requests(struct request_queue *q, struct request *rq, + struct request *next) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + /* + * Reposition in fifo if next is older than rq. + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && + time_before(rq_fifo_time(next), rq_fifo_time(rq))) { + list_move(&rq->queuelist, &next->queuelist); + rq_set_fifo_time(rq, rq_fifo_time(next)); + } + + /* + * eqm_lock needed to avoid that other critical sections not holding + * the queue_lock read an inconsistent value from bfqq->next_rq while + * traversing the rq_pos_trees + */ + if (bfqq->next_rq == next) { + spin_lock(&bfqq->bfqd->eqm_lock); + bfqq->next_rq = rq; + spin_unlock(&bfqq->bfqd->eqm_lock); + } + + bfq_remove_request(next); +} + +/* Must be called with bfqq != NULL */ +static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq) +{ + BUG_ON(bfqq == NULL); + if (bfq_bfqq_busy(bfqq)) + bfqq->bfqd->wr_busy_queues--; + bfqq->wr_coeff = 1; + bfqq->wr_cur_max_time = 0; + /* Trigger a weight change on the next activation of the queue */ + bfqq->entity.ioprio_changed = 1; +} + +static void bfq_end_wr_async_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_BE_NR; j++) + if (bfqg->async_bfqq[i][j] != NULL) + bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); + if (bfqg->async_idle_bfqq != NULL) + bfq_bfqq_end_wr(bfqg->async_idle_bfqq); +} + +static void bfq_end_wr(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + + spin_lock_irq(bfqd->queue->queue_lock); + + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) + bfq_bfqq_end_wr(bfqq); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) + bfq_bfqq_end_wr(bfqq); + bfq_end_wr_async(bfqd); + + spin_unlock_irq(bfqd->queue->queue_lock); +} + +static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) +{ + if (request) + return blk_rq_pos(io_struct); + else + return ((struct bio *)io_struct)->bi_sector; +} + +static inline sector_t bfq_dist_from(sector_t pos1, + sector_t pos2) +{ + if (pos1 >= pos2) + return pos1 - pos2; + else + return pos2 - pos1; +} + +static inline int bfq_rq_close_to_sector(void *io_struct, bool request, + sector_t sector) +{ + return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= + BFQQ_SEEK_THR; +} + +static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) +{ + struct rb_root *root = &bfqd->rq_pos_tree; + struct rb_node *parent, *node; + struct bfq_queue *__bfqq; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + /* + * First, if we find a request starting at the end of the last + * request, choose it. + */ + __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); + if (__bfqq != NULL) + return __bfqq; + + /* + * If the exact sector wasn't found, the parent of the NULL leaf + * will contain the closest sector (rq_pos_tree sorted by + * next_request position). + */ + __bfqq = rb_entry(parent, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + if (blk_rq_pos(__bfqq->next_rq) < sector) + node = rb_next(&__bfqq->pos_node); + else + node = rb_prev(&__bfqq->pos_node); + if (node == NULL) + return NULL; + + __bfqq = rb_entry(node, struct bfq_queue, pos_node); + if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) + return __bfqq; + + return NULL; +} + +/* + * bfqd - obvious + * cur_bfqq - passed in so that we don't decide that the current queue + * is closely cooperating with itself + * sector - used as a reference point to search for a close queue + */ +static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, + struct bfq_queue *cur_bfqq, + sector_t sector) +{ + struct bfq_queue *bfqq; + + if (bfq_class_idle(cur_bfqq)) + return NULL; + if (!bfq_bfqq_sync(cur_bfqq)) + return NULL; + if (BFQQ_SEEKY(cur_bfqq)) + return NULL; + + /* If device has only one backlogged bfq_queue, don't search. */ + if (bfqd->busy_queues == 1) + return NULL; + + /* + * We should notice if some of the queues are cooperating, e.g. + * working closely on the same area of the disk. In that case, + * we can group them together and don't waste time idling. + */ + bfqq = bfqq_close(bfqd, sector); + if (bfqq == NULL || bfqq == cur_bfqq) + return NULL; + + /* + * Do not merge queues from different bfq_groups. + */ + if (bfqq->entity.parent != cur_bfqq->entity.parent) + return NULL; + + /* + * It only makes sense to merge sync queues. + */ + if (!bfq_bfqq_sync(bfqq)) + return NULL; + if (BFQQ_SEEKY(bfqq)) + return NULL; + + /* + * Do not merge queues of different priority classes. + */ + if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) + return NULL; + + return bfqq; +} + +static struct bfq_queue * +bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + int process_refs, new_process_refs; + struct bfq_queue *__bfqq; + + /* + * If there are no process references on the new_bfqq, then it is + * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain + * may have dropped their last reference (not just their last process + * reference). + */ + if (!bfqq_process_refs(new_bfqq)) + return NULL; + + /* Avoid a circular list and skip interim queue merges. */ + while ((__bfqq = new_bfqq->new_bfqq)) { + if (__bfqq == bfqq) + return NULL; + new_bfqq = __bfqq; + } + + process_refs = bfqq_process_refs(bfqq); + new_process_refs = bfqq_process_refs(new_bfqq); + /* + * If the process for the bfqq has gone away, there is no + * sense in merging the queues. + */ + if (process_refs == 0 || new_process_refs == 0) + return NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", + new_bfqq->pid); + + /* + * Merging is just a redirection: the requests of the process owning + * one of the two queues are redirected to the other queue. The latter + * queue, in its turn, is set as shared if this is the first time that + * the requests of some process are redirected to it. + * + * We redirect bfqq to new_bfqq and not the opposite, because we + * are in the context of the process owning bfqq, hence we have the + * io_cq of this process. So we can immediately configure this io_cq + * to redirect the requests of the process to new_bfqq. + * + * NOTE, even if new_bfqq coincides with the in-service queue, the + * io_cq of new_bfqq is not available, because, if the in-service queue + * is shared, bfqd->in_service_cic may not point to the io_cq of the + * in-service queue. + * Redirecting the requests of the process owning bfqq to the currently + * in-service queue is in any case the best option, as we feed the + * in-service queue with new requests close to the last request served + * and, by doing so, hopefully increase the throughput. + */ + bfqq->new_bfqq = new_bfqq; + atomic_add(process_refs, &new_bfqq->ref); + return new_bfqq; +} + +/* + * Attempt to schedule a merge of bfqq with the currently in-service queue + * or with a close queue among the scheduled queues. + * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue + * structure otherwise. + */ +static struct bfq_queue * +bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + void *io_struct, bool request) +{ + struct bfq_queue *in_service_bfqq, *new_bfqq; + + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + + if (!io_struct) + return NULL; + + in_service_bfqq = bfqd->in_service_queue; + + if (in_service_bfqq == NULL || in_service_bfqq == bfqq || + !bfqd->in_service_cic) + goto check_scheduled; + + if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq)) + goto check_scheduled; + + if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq)) + goto check_scheduled; + + if (in_service_bfqq->entity.parent != bfqq->entity.parent) + goto check_scheduled; + + if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) + if ((new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq))) + return new_bfqq; /* Merge with in-service queue */ + + /* + * Check whether there is a cooperator among currently scheduled + * queues. The only thing we need is that the bio/request is not + * NULL, as we need it to establish whether a cooperator exists. + */ +check_scheduled: + new_bfqq = bfq_close_cooperator(bfqd, bfqq, + bfq_io_struct_pos(io_struct, request)); + if (new_bfqq) + return bfq_setup_merge(bfqq, new_bfqq); + + return NULL; +} + +static inline void +bfq_bfqq_save_state(struct bfq_queue *bfqq) +{ + /* + * If bfqq->cic == NULL, the queue is already shared or its requests + * have already been redirected to a shared queue; both idle window + * and weight raising state have already been saved. Do nothing. + */ + if (bfqq->cic == NULL) + return; + if (bfqq->cic->wr_time_left) + /* + * This is the queue of a just-started process, and would + * deserve weight raising: we set raising_time_left to the full + * weight-raising duration to trigger weight-raising when + * and if the queue is split and the first request of the + * queue is enqueued. + */ + bfqq->cic->wr_time_left = bfq_wr_duration(bfqq->bfqd); + else if (bfqq->wr_coeff > 1) { + unsigned long wr_duration = + jiffies - bfqq->last_wr_start_finish; + /* + * It may happen that a queue's weight raising period lasts + * longer than its raising_cur_max_time, as weight raising is + * handled only when a request is enqueued or dispatched (it + * does not use any timer). If the weight raising period is + * about to end, don't save it. + */ + if (bfqq->wr_cur_max_time <= wr_duration) + bfqq->cic->wr_time_left = 0; + else + bfqq->cic->wr_time_left = + bfqq->wr_cur_max_time - wr_duration; + /* + * The bfq_queue is becoming shared or the requests of the + * process owning the queue are being redirected to a shared + * queue. Stop the weight raising period of the queue, as in + * both cases it should not be owned by an interactive or + * soft real-time application. + */ + bfq_bfqq_end_wr(bfqq); + } else + bfqq->cic->wr_time_left = 0; + bfqq->cic->saved_idle_window = bfq_bfqq_idle_window(bfqq); + bfqq->cic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); + bfqq->cic->cooperations++; + bfqq->cic->failed_cooperations = 0; +} + +static inline void +bfq_get_cic_reference(struct bfq_queue *bfqq) +{ + /* + * If bfqq->cic has a non-NULL value, the cic to which it belongs + * is about to begin using a shared bfq_queue. + */ + if (bfqq->cic) + atomic_long_inc(&bfqq->cic->ioc->refcount); +} + +static void +bfq_merge_bfqqs(struct bfq_data *bfqd, struct cfq_io_context *cic, + struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", + (long unsigned)new_bfqq->pid); + /* Save weight raising and idle window of the merged queues */ + bfq_bfqq_save_state(bfqq); + bfq_bfqq_save_state(new_bfqq); + if (bfq_bfqq_IO_bound(bfqq)) + bfq_mark_bfqq_IO_bound(new_bfqq); + bfq_clear_bfqq_IO_bound(bfqq); + /* + * Grab a reference to the cic, to prevent it from being destroyed + * before being possibly touched by a bfq_split_bfqq(). + */ + bfq_get_cic_reference(bfqq); + bfq_get_cic_reference(new_bfqq); + /* + * Merge queues (that is, let cic redirect its requests to new_bfqq) + */ + cic_set_bfqq(cic, new_bfqq, 1); + bfq_mark_bfqq_coop(new_bfqq); + /* + * new_bfqq now belongs to at least two cics (it is a shared queue): set + * new_bfqq->cic to NULL. bfqq either: + * - does not belong to any cic any more, and hence bfqq->cic must + * be set to NULL, or + * - is a queue whose owning cics have already been redirected to a + * different queue, hence the queue is destined to not belong to any + * cic soon and bfqq->cic is already NULL (therefore the next + * assignment causes no harm). + */ + new_bfqq->cic = NULL; + bfqq->cic = NULL; + bfq_put_queue(bfqq); +} + +static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) +{ + struct cfq_io_context *cic = bfqq->cic; + struct bfq_data *bfqd = bfqq->bfqd; + + if (cic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { + cic->failed_cooperations++; + if (cic->failed_cooperations >= bfqd->bfq_failed_cooperations) + cic->cooperations = 0; + } +} + +static int bfq_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + struct bfq_queue *bfqq, *new_bfqq; + unsigned long flags; + + /* Disallow merge of a sync bio into an async request. */ + if (bfq_bio_sync(bio) && !rq_is_sync(rq)) + return 0; + + /* + * Lookup the bfqq that this bio will be queued with. Allow + * merge only if rq is queued there. + */ + cic = bfq_cic_lookup(bfqd, current->io_context); + if (cic == NULL) + return 0; + + /* + * The allow_merge_fn scheduler hook may be called with or without + * the queue_lock being held. Access to the rq_pos_tree data + * structures and to cic->bfqq[] is protected by the eqm_lock. + */ + spin_lock_irqsave(&bfqd->eqm_lock, flags); + bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio)); + /* + * We take advantage of this function to perform an early merge + * of the queues of possible cooperating processes. + */ + if (bfqq != NULL) { + new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); + if (new_bfqq != NULL) { + bfq_merge_bfqqs(bfqd, cic, bfqq, new_bfqq); + /* + * If we get here, the bio will be queued in the + * shared queue, i.e., new_bfqq, so use new_bfqq + * to decide whether bio and rq can be merged. + */ + bfqq = new_bfqq; + } else + bfq_bfqq_increase_failed_cooperations(bfqq); + } + spin_unlock_irqrestore(&bfqd->eqm_lock, flags); + + return bfqq == RQ_BFQQ(rq); +} + +static void __bfq_set_in_service_queue(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + if (bfqq != NULL) { + bfq_mark_bfqq_must_alloc(bfqq); + bfq_mark_bfqq_budget_new(bfqq); + bfq_clear_bfqq_fifo_expire(bfqq); + + bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; + + bfq_log_bfqq(bfqd, bfqq, + "set_in_service_queue, cur-budget = %lu", + bfqq->entity.budget); + } + + bfqd->in_service_queue = bfqq; +} + +/* + * Get and set a new queue for service. + */ +static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); + + __bfq_set_in_service_queue(bfqd, bfqq); + return bfqq; +} + +/* + * If enough samples have been computed, return the current max budget + * stored in bfqd, which is dynamically updated according to the + * estimated disk peak rate; otherwise return the default max budget + */ +static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget; + else + return bfqd->bfq_max_budget; +} + +/* + * Return min budget, which is a fraction of the current or default + * max budget (trying with 1/32) + */ +static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) +{ + if (bfqd->budgets_assigned < 194) + return bfq_default_max_budget / 32; + else + return bfqd->bfq_max_budget / 32; +} + +static void bfq_arm_slice_timer(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->in_service_queue; + struct cfq_io_context *cic; + unsigned long sl; + + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + /* Processes have exited, don't wait. */ + cic = bfqd->in_service_cic; + if (cic == NULL || atomic_read(&cic->ioc->nr_tasks) == 0) + return; + + bfq_mark_bfqq_wait_request(bfqq); + + /* + * We don't want to idle for seeks, but we do want to allow + * fair distribution of slice time for a process doing back-to-back + * seeks. So allow a little bit of time for him to submit a new rq. + * + * To prevent processes with (partly) seeky workloads from + * being too ill-treated, grant them a small fraction of the + * assigned budget before reducing the waiting time to + * BFQ_MIN_TT. This happened to help reduce latency. + */ + sl = bfqd->bfq_slice_idle; + /* + * Unless the queue is being weight-raised, grant only minimum idle + * time if the queue either has been seeky for long enough or has + * already proved to be constantly seeky. + */ + if (bfq_sample_valid(bfqq->seek_samples) && + ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > + bfq_max_budget(bfqq->bfqd) / 8) || + bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1) + sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); + else if (bfqq->wr_coeff > 1) + sl = sl * 3; + bfqd->last_idling_start = ktime_get(); + mod_timer(&bfqd->idle_slice_timer, jiffies + sl); + bfq_log(bfqd, "arm idle: %u/%u ms", + jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); +} + +/* + * Set the maximum time for the in-service queue to consume its + * budget. This prevents seeky processes from lowering the disk + * throughput (always guaranteed with a time slice scheme as in CFQ). + */ +static void bfq_set_budget_timeout(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq = bfqd->in_service_queue; + unsigned int timeout_coeff; + if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) + timeout_coeff = 1; + else + timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; + + bfqd->last_budget_start = ktime_get(); + + bfq_clear_bfqq_budget_new(bfqq); + bfqq->budget_timeout = jiffies + + bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; + + bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", + jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * + timeout_coeff)); +} + +/* + * Move request from internal lists to the request queue dispatch list. + */ +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + /* + * For consistency, the next instruction should have been executed + * after removing the request from the queue and dispatching it. + * We execute instead this instruction before bfq_remove_request() + * (and hence introduce a temporary inconsistency), for efficiency. + * In fact, in a forced_dispatch, this prevents two counters related + * to bfqq->dispatched to risk to be uselessly decremented if bfqq + * is not in service, and then to be incremented again after + * incrementing bfqq->dispatched. + */ + bfqq->dispatched++; + bfq_remove_request(rq); + elv_dispatch_sort(q, rq); + + if (bfq_bfqq_sync(bfqq)) + bfqd->sync_flight++; +} + +/* + * Return expired entry, or NULL to just start from scratch in rbtree. + */ +static struct request *bfq_check_fifo(struct bfq_queue *bfqq) +{ + struct request *rq = NULL; + + if (bfq_bfqq_fifo_expire(bfqq)) + return NULL; + + bfq_mark_bfqq_fifo_expire(bfqq); + + if (list_empty(&bfqq->fifo)) + return NULL; + + rq = rq_entry_fifo(bfqq->fifo.next); + + if (time_before(jiffies, rq_fifo_time(rq))) + return NULL; + + return rq; +} + +static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + return entity->budget - entity->service; +} + +/* Must be called with eqm_lock held */ +static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfqq != bfqd->in_service_queue); + + __bfq_bfqd_reset_in_service(bfqd); + + /* + * If this bfqq is shared between multiple processes, check + * to make sure that those processes are still issuing I/Os + * within the mean seek distance. If not, it may be time to + * break the queues apart again. + */ + if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) + bfq_mark_bfqq_split_coop(bfqq); + + if (RB_EMPTY_ROOT(&bfqq->sort_list)) { + /* + * Overloading budget_timeout field to store the time + * at which the queue remains with no backlog; used by + * the weight-raising mechanism. + */ + bfqq->budget_timeout = jiffies; + bfq_del_bfqq_busy(bfqd, bfqq, 1); + } + else { + bfq_activate_bfqq(bfqd, bfqq); + /* + * Resort priority tree of potential close cooperators. + */ + bfq_rq_pos_tree_add(bfqd, bfqq); + } +} + +/** + * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. + * @bfqd: device data. + * @bfqq: queue to update. + * @reason: reason for expiration. + * + * Handle the feedback on @bfqq budget. See the body for detailed + * comments. + */ +static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + enum bfqq_expiration reason) +{ + struct request *next_rq; + unsigned long budget, min_budget; + + budget = bfqq->max_budget; + min_budget = bfq_min_budget(bfqd); + + BUG_ON(bfqq != bfqd->in_service_queue); + + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", + bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", + budget, bfq_min_budget(bfqd)); + bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", + bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); + + if (bfq_bfqq_sync(bfqq)) { + switch (reason) { + /* + * Caveat: in all the following cases we trade latency + * for throughput. + */ + case BFQ_BFQQ_TOO_IDLE: + /* + * This is the only case where we may reduce + * the budget: if there is no request of the + * process still waiting for completion, then + * we assume (tentatively) that the timer has + * expired because the batch of requests of + * the process could have been served with a + * smaller budget. Hence, betting that + * process will behave in the same way when it + * becomes backlogged again, we reduce its + * next budget. As long as we guess right, + * this budget cut reduces the latency + * experienced by the process. + * + * However, if there are still outstanding + * requests, then the process may have not yet + * issued its next request just because it is + * still waiting for the completion of some of + * the still outstanding ones. So in this + * subcase we do not reduce its budget, on the + * contrary we increase it to possibly boost + * the throughput, as discussed in the + * comments to the BUDGET_TIMEOUT case. + */ + if (bfqq->dispatched > 0) /* still outstanding reqs */ + budget = min(budget * 2, bfqd->bfq_max_budget); + else { + if (budget > 5 * min_budget) + budget -= 4 * min_budget; + else + budget = min_budget; + } + break; + case BFQ_BFQQ_BUDGET_TIMEOUT: + /* + * We double the budget here because: 1) it + * gives the chance to boost the throughput if + * this is not a seeky process (which may have + * bumped into this timeout because of, e.g., + * ZBR), 2) together with charge_full_budget + * it helps give seeky processes higher + * timestamps, and hence be served less + * frequently. + */ + budget = min(budget * 2, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_BUDGET_EXHAUSTED: + /* + * The process still has backlog, and did not + * let either the budget timeout or the disk + * idling timeout expire. Hence it is not + * seeky, has a short thinktime and may be + * happy with a higher budget too. So + * definitely increase the budget of this good + * candidate to boost the disk throughput. + */ + budget = min(budget * 4, bfqd->bfq_max_budget); + break; + case BFQ_BFQQ_NO_MORE_REQUESTS: + /* + * Leave the budget unchanged. + */ + default: + return; + } + } else /* async queue */ + /* async queues get always the maximum possible budget + * (their ability to dispatch is limited by + * @bfqd->bfq_max_budget_async_rq). + */ + budget = bfqd->bfq_max_budget; + + bfqq->max_budget = budget; + + if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && + bfqq->max_budget > bfqd->bfq_max_budget) + bfqq->max_budget = bfqd->bfq_max_budget; + + /* + * Make sure that we have enough budget for the next request. + * Since the finish time of the bfqq must be kept in sync with + * the budget, be sure to call __bfq_bfqq_expire() after the + * update. + */ + next_rq = bfqq->next_rq; + if (next_rq != NULL) + bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, + bfq_serv_to_charge(next_rq, bfqq)); + else + bfqq->entity.budget = bfqq->max_budget; + + bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", + next_rq != NULL ? blk_rq_sectors(next_rq) : 0, + bfqq->entity.budget); +} + +static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) +{ + unsigned long max_budget; + + /* + * The max_budget calculated when autotuning is equal to the + * amount of sectors transfered in timeout_sync at the + * estimated peak rate. + */ + max_budget = (unsigned long)(peak_rate * 1000 * + timeout >> BFQ_RATE_SHIFT); + + return max_budget; +} + +/* + * In addition to updating the peak rate, checks whether the process + * is "slow", and returns 1 if so. This slow flag is used, in addition + * to the budget timeout, to reduce the amount of service provided to + * seeky processes, and hence reduce their chances to lower the + * throughput. See the code for more details. + */ +static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int compensate, enum bfqq_expiration reason) +{ + u64 bw, usecs, expected, timeout; + ktime_t delta; + int update = 0; + + if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) + return 0; + + if (compensate) + delta = bfqd->last_idling_start; + else + delta = ktime_get(); + delta = ktime_sub(delta, bfqd->last_budget_start); + usecs = ktime_to_us(delta); + + /* Don't trust short/unrealistic values. */ + if (usecs < 100 || usecs >= LONG_MAX) + return 0; + + /* + * Calculate the bandwidth for the last slice. We use a 64 bit + * value to store the peak rate, in sectors per usec in fixed + * point math. We do so to have enough precision in the estimate + * and to avoid overflows. + */ + bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; + do_div(bw, (unsigned long)usecs); + + timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + /* + * Use only long (> 20ms) intervals to filter out spikes for + * the peak rate estimation. + */ + if (usecs > 20000) { + if (bw > bfqd->peak_rate || + (!BFQQ_SEEKY(bfqq) && + reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { + bfq_log(bfqd, "measured bw =%llu", bw); + /* + * To smooth oscillations use a low-pass filter with + * alpha=7/8, i.e., + * new_rate = (7/8) * old_rate + (1/8) * bw + */ + do_div(bw, 8); + if (bw == 0) + return 0; + bfqd->peak_rate *= 7; + do_div(bfqd->peak_rate, 8); + bfqd->peak_rate += bw; + update = 1; + bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); + } + + update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; + + if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) + bfqd->peak_rate_samples++; + + if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && + update) { + int dev_type = blk_queue_nonrot(bfqd->queue); + if (bfqd->bfq_user_max_budget == 0) { + bfqd->bfq_max_budget = + bfq_calc_max_budget(bfqd->peak_rate, + timeout); + bfq_log(bfqd, "new max_budget=%lu", + bfqd->bfq_max_budget); + } + if (bfqd->device_speed == BFQ_BFQD_FAST && + bfqd->peak_rate < device_speed_thresh[dev_type]) { + bfqd->device_speed = BFQ_BFQD_SLOW; + bfqd->RT_prod = R_slow[dev_type] * + T_slow[dev_type]; + } else if (bfqd->device_speed == BFQ_BFQD_SLOW && + bfqd->peak_rate > device_speed_thresh[dev_type]) { + bfqd->device_speed = BFQ_BFQD_FAST; + bfqd->RT_prod = R_fast[dev_type] * + T_fast[dev_type]; + } + } + } + + /* + * If the process has been served for a too short time + * interval to let its possible sequential accesses prevail on + * the initial seek time needed to move the disk head on the + * first sector it requested, then give the process a chance + * and for the moment return false. + */ + if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) + return 0; + + /* + * A process is considered ``slow'' (i.e., seeky, so that we + * cannot treat it fairly in the service domain, as it would + * slow down too much the other processes) if, when a slice + * ends for whatever reason, it has received service at a + * rate that would not be high enough to complete the budget + * before the budget timeout expiration. + */ + expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; + + /* + * Caveat: processes doing IO in the slower disk zones will + * tend to be slow(er) even if not seeky. And the estimated + * peak rate will actually be an average over the disk + * surface. Hence, to not be too harsh with unlucky processes, + * we keep a budget/3 margin of safety before declaring a + * process slow. + */ + return expected > (4 * bfqq->entity.budget) / 3; +} + +/* + * To be deemed as soft real-time, an application must meet two + * requirements. First, the application must not require an average + * bandwidth higher than the approximate bandwidth required to playback or + * record a compressed high-definition video. + * The next function is invoked on the completion of the last request of a + * batch, to compute the next-start time instant, soft_rt_next_start, such + * that, if the next request of the application does not arrive before + * soft_rt_next_start, then the above requirement on the bandwidth is met. + * + * The second requirement is that the request pattern of the application is + * isochronous, i.e., that, after issuing a request or a batch of requests, + * the application stops issuing new requests until all its pending requests + * have been completed. After that, the application may issue a new batch, + * and so on. + * For this reason the next function is invoked to compute + * soft_rt_next_start only for applications that meet this requirement, + * whereas soft_rt_next_start is set to infinity for applications that do + * not. + * + * Unfortunately, even a greedy application may happen to behave in an + * isochronous way if the CPU load is high. In fact, the application may + * stop issuing requests while the CPUs are busy serving other processes, + * then restart, then stop again for a while, and so on. In addition, if + * the disk achieves a low enough throughput with the request pattern + * issued by the application (e.g., because the request pattern is random + * and/or the device is slow), then the application may meet the above + * bandwidth requirement too. To prevent such a greedy application to be + * deemed as soft real-time, a further rule is used in the computation of + * soft_rt_next_start: soft_rt_next_start must be higher than the current + * time plus the maximum time for which the arrival of a request is waited + * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. + * This filters out greedy applications, as the latter issue instead their + * next request as soon as possible after the last one has been completed + * (in contrast, when a batch of requests is completed, a soft real-time + * application spends some time processing data). + * + * Unfortunately, the last filter may easily generate false positives if + * only bfqd->bfq_slice_idle is used as a reference time interval and one + * or both the following cases occur: + * 1) HZ is so low that the duration of a jiffy is comparable to or higher + * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with + * HZ=100. + * 2) jiffies, instead of increasing at a constant rate, may stop increasing + * for a while, then suddenly 'jump' by several units to recover the lost + * increments. This seems to happen, e.g., inside virtual machines. + * To address this issue, we do not use as a reference time interval just + * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In + * particular we add the minimum number of jiffies for which the filter + * seems to be quite precise also in embedded systems and KVM/QEMU virtual + * machines. + */ +static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + return max(bfqq->last_idle_bklogged + + HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate, + jiffies + bfqq->bfqd->bfq_slice_idle + 4); +} + +/* + * Return the largest-possible time instant such that, for as long as possible, + * the current time will be lower than this time instant according to the macro + * time_is_before_jiffies(). + */ +static inline unsigned long bfq_infinity_from_now(unsigned long now) +{ + return now + ULONG_MAX / 2; +} + +/** + * bfq_bfqq_expire - expire a queue. + * @bfqd: device owning the queue. + * @bfqq: the queue to expire. + * @compensate: if true, compensate for the time spent idling. + * @reason: the reason causing the expiration. + * + * + * If the process associated to the queue is slow (i.e., seeky), or in + * case of budget timeout, or, finally, if it is async, we + * artificially charge it an entire budget (independently of the + * actual service it received). As a consequence, the queue will get + * higher timestamps than the correct ones upon reactivation, and + * hence it will be rescheduled as if it had received more service + * than what it actually received. In the end, this class of processes + * will receive less service in proportion to how slowly they consume + * their budgets (and hence how seriously they tend to lower the + * throughput). + * + * In contrast, when a queue expires because it has been idling for + * too much or because it exhausted its budget, we do not touch the + * amount of service it has received. Hence when the queue will be + * reactivated and its timestamps updated, the latter will be in sync + * with the actual service received by the queue until expiration. + * + * Charging a full budget to the first type of queues and the exact + * service to the others has the effect of using the WF2Q+ policy to + * schedule the former on a timeslice basis, without violating the + * service domain guarantees of the latter. + */ +static void bfq_bfqq_expire(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + int compensate, + enum bfqq_expiration reason) +{ + int slow; + BUG_ON(bfqq != bfqd->in_service_queue); + + /* Update disk peak rate for autotuning and check whether the + * process is slow (see bfq_update_peak_rate). + */ + slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); + + /* + * As above explained, 'punish' slow (i.e., seeky), timed-out + * and async queues, to favor sequential sync workloads. + * + * Processes doing I/O in the slower disk zones will tend to be + * slow(er) even if not seeky. Hence, since the estimated peak + * rate is actually an average over the disk surface, these + * processes may timeout just for bad luck. To avoid punishing + * them we do not charge a full budget to a process that + * succeeded in consuming at least 2/3 of its budget. + */ + if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) + bfq_bfqq_charge_full_budget(bfqq); + + bfqq->service_from_backlogged += bfqq->entity.service; + + if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT && + !bfq_bfqq_constantly_seeky(bfqq)) { + bfq_mark_bfqq_constantly_seeky(bfqq); + if (!blk_queue_nonrot(bfqd->queue)) + bfqd->const_seeky_busy_in_flight_queues++; + } + + if (reason == BFQ_BFQQ_TOO_IDLE && + bfqq->entity.service <= 2 * bfqq->entity.budget / 10 ) + bfq_clear_bfqq_IO_bound(bfqq); + + if (bfqd->low_latency && bfqq->wr_coeff == 1) + bfqq->last_wr_start_finish = jiffies; + + if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && + RB_EMPTY_ROOT(&bfqq->sort_list)) { + /* + * If we get here, and there are no outstanding requests, + * then the request pattern is isochronous (see the comments + * to the function bfq_bfqq_softrt_next_start()). Hence we + * can compute soft_rt_next_start. If, instead, the queue + * still has outstanding requests, then we have to wait + * for the completion of all the outstanding requests to + * discover whether the request pattern is actually + * isochronous. + */ + if (bfqq->dispatched == 0) + bfqq->soft_rt_next_start = + bfq_bfqq_softrt_next_start(bfqd, bfqq); + else { + /* + * The application is still waiting for the + * completion of one or more requests: + * prevent it from possibly being incorrectly + * deemed as soft real-time by setting its + * soft_rt_next_start to infinity. In fact, + * without this assignment, the application + * would be incorrectly deemed as soft + * real-time if: + * 1) it issued a new request before the + * completion of all its in-flight + * requests, and + * 2) at that time, its soft_rt_next_start + * happened to be in the past. + */ + bfqq->soft_rt_next_start = + bfq_infinity_from_now(jiffies); + /* + * Schedule an update of soft_rt_next_start to when + * the task may be discovered to be isochronous. + */ + bfq_mark_bfqq_softrt_update(bfqq); + } + } + + bfq_log_bfqq(bfqd, bfqq, + "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, + slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); + + /* + * Increase, decrease or leave budget unchanged according to + * reason. + */ + __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); + spin_lock(&bfqd->eqm_lock); + __bfq_bfqq_expire(bfqd, bfqq); + spin_unlock(&bfqd->eqm_lock); +} + +/* + * Budget timeout is not implemented through a dedicated timer, but + * just checked on request arrivals and completions, as well as on + * idle timer expirations. + */ +static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_budget_new(bfqq) || + time_before(jiffies, bfqq->budget_timeout)) + return 0; + return 1; +} + +/* + * If we expire a queue that is waiting for the arrival of a new + * request, we may prevent the fictitious timestamp back-shifting that + * allows the guarantees of the queue to be preserved (see [1] for + * this tricky aspect). Hence we return true only if this condition + * does not hold, or if the queue is slow enough to deserve only to be + * kicked off for preserving a high throughput. +*/ +static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, + "may_budget_timeout: wait_request %d left %d timeout %d", + bfq_bfqq_wait_request(bfqq), + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, + bfq_bfqq_budget_timeout(bfqq)); + + return (!bfq_bfqq_wait_request(bfqq) || + bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) + && + bfq_bfqq_budget_timeout(bfqq); +} + +/* + * Device idling is allowed only for the queues for which this function + * returns true. For this reason, the return value of this function plays a + * critical role for both throughput boosting and service guarantees. The + * return value is computed through a logical expression. In this rather + * long comment, we try to briefly describe all the details and motivations + * behind the components of this logical expression. + * + * First, the expression may be true only for sync queues. Besides, if + * bfqq is also being weight-raised, then the expression always evaluates + * to true, as device idling is instrumental for preserving low-latency + * guarantees (see [1]). Otherwise, the expression evaluates to true only + * if bfqq has a non-null idle window and at least one of the following + * two conditions holds. The first condition is that the device is not + * performing NCQ, because idling the device most certainly boosts the + * throughput if this condition holds and bfqq has been granted a non-null + * idle window. The second compound condition is made of the logical AND of + * two components. + * + * The first component is true only if there is no weight-raised busy + * queue. This guarantees that the device is not idled for a sync non- + * weight-raised queue when there are busy weight-raised queues. The former + * is then expired immediately if empty. Combined with the timestamping + * rules of BFQ (see [1] for details), this causes sync non-weight-raised + * queues to get a lower number of requests served, and hence to ask for a + * lower number of requests from the request pool, before the busy weight- + * raised queues get served again. + * + * This is beneficial for the processes associated with weight-raised + * queues, when the request pool is saturated (e.g., in the presence of + * write hogs). In fact, if the processes associated with the other queues + * ask for requests at a lower rate, then weight-raised processes have a + * higher probability to get a request from the pool immediately (or at + * least soon) when they need one. Hence they have a higher probability to + * actually get a fraction of the disk throughput proportional to their + * high weight. This is especially true with NCQ-capable drives, which + * enqueue several requests in advance and further reorder internally- + * queued requests. + * + * In the end, mistreating non-weight-raised queues when there are busy + * weight-raised queues seems to mitigate starvation problems in the + * presence of heavy write workloads and NCQ, and hence to guarantee a + * higher application and system responsiveness in these hostile scenarios. + * + * If the first component of the compound condition is instead true, i.e., + * there is no weight-raised busy queue, then the second component of the + * compound condition takes into account service-guarantee and throughput + * issues related to NCQ (recall that the compound condition is evaluated + * only if the device is detected as supporting NCQ). + * + * As for service guarantees, allowing the drive to enqueue more than one + * request at a time, and hence delegating de facto final scheduling + * decisions to the drive's internal scheduler, causes loss of control on + * the actual request service order. In this respect, when the drive is + * allowed to enqueue more than one request at a time, the service + * distribution enforced by the drive's internal scheduler is likely to + * coincide with the desired device-throughput distribution only in the + * following, perfectly symmetric, scenario: + * 1) all active queues have the same weight, + * 2) all active groups at the same level in the groups tree have the same + * weight, + * 3) all active groups at the same level in the groups tree have the same + * number of children. + * + * Even in such a scenario, sequential I/O may still receive a preferential + * treatment, but this is not likely to be a big issue with flash-based + * devices, because of their non-dramatic loss of throughput with random + * I/O. Things do differ with HDDs, for which additional care is taken, as + * explained after completing the discussion for flash-based devices. + * + * Unfortunately, keeping the necessary state for evaluating exactly the + * above symmetry conditions would be quite complex and time-consuming. + * Therefore BFQ evaluates instead the following stronger sub-conditions, + * for which it is much easier to maintain the needed state: + * 1) all active queues have the same weight, + * 2) all active groups have the same weight, + * 3) all active groups have at most one active child each. + * In particular, the last two conditions are always true if hierarchical + * support and the cgroups interface are not enabled, hence no state needs + * to be maintained in this case. + * + * According to the above considerations, the second component of the + * compound condition evaluates to true if any of the above symmetry + * sub-condition does not hold, or the device is not flash-based. Therefore, + * if also the first component is true, then idling is allowed for a sync + * queue. These are the only sub-conditions considered if the device is + * flash-based, as, for such a device, it is sensible to force idling only + * for service-guarantee issues. In fact, as for throughput, idling + * NCQ-capable flash-based devices would not boost the throughput even + * with sequential I/O; rather it would lower the throughput in proportion + * to how fast the device is. In the end, (only) if all the three + * sub-conditions hold and the device is flash-based, the compound + * condition evaluates to false and therefore no idling is performed. + * + * As already said, things change with a rotational device, where idling + * boosts the throughput with sequential I/O (even with NCQ). Hence, for + * such a device the second component of the compound condition evaluates + * to true also if the following additional sub-condition does not hold: + * the queue is constantly seeky. Unfortunately, this different behavior + * with respect to flash-based devices causes an additional asymmetry: if + * some sync queues enjoy idling and some other sync queues do not, then + * the latter get a low share of the device throughput, simply because the + * former get many requests served after being set as in service, whereas + * the latter do not. As a consequence, to guarantee the desired throughput + * distribution, on HDDs the compound expression evaluates to true (and + * hence device idling is performed) also if the following last symmetry + * condition does not hold: no other queue is benefiting from idling. Also + * this last condition is actually replaced with a simpler-to-maintain and + * stronger condition: there is no busy queue which is not constantly seeky + * (and hence may also benefit from idling). + * + * To sum up, when all the required symmetry and throughput-boosting + * sub-conditions hold, the second component of the compound condition + * evaluates to false, and hence no idling is performed. This helps to + * keep the drives' internal queues full on NCQ-capable devices, and hence + * to boost the throughput, without causing 'almost' any loss of service + * guarantees. The 'almost' follows from the fact that, if the internal + * queue of one such device is filled while all the sub-conditions hold, + * but at some point in time some sub-condition stops to hold, then it may + * become impossible to let requests be served in the new desired order + * until all the requests already queued in the device have been served. + */ +static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; +#ifdef CONFIG_CGROUP_BFQIO +#define symmetric_scenario (!bfqd->active_numerous_groups && \ + !bfq_differentiated_weights(bfqd)) +#else +#define symmetric_scenario (!bfq_differentiated_weights(bfqd)) +#endif +#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \ + bfqd->busy_in_flight_queues == \ + bfqd->const_seeky_busy_in_flight_queues) +/* + * Condition for expiring a non-weight-raised queue (and hence not idling + * the device). + */ +#define cond_for_expiring_non_wr (bfqd->hw_tag && \ + (bfqd->wr_busy_queues > 0 || \ + (symmetric_scenario && \ + (blk_queue_nonrot(bfqd->queue) || \ + cond_for_seeky_on_ncq_hdd)))) + + return bfq_bfqq_sync(bfqq) && + (bfq_bfqq_IO_bound(bfqq) || bfqq->wr_coeff > 1) && + (bfqq->wr_coeff > 1 || + (bfq_bfqq_idle_window(bfqq) && + !cond_for_expiring_non_wr) + ); +} + +/* + * If the in-service queue is empty but sync, and the function + * bfq_bfqq_must_not_expire returns true, then: + * 1) the queue must remain in service and cannot be expired, and + * 2) the disk must be idled to wait for the possible arrival of a new + * request for the queue. + * See the comments to the function bfq_bfqq_must_not_expire for the reasons + * why performing device idling is the best choice to boost the throughput + * and preserve service guarantees when bfq_bfqq_must_not_expire itself + * returns true. + */ +static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; + + return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && + bfq_bfqq_must_not_expire(bfqq); +} + +/* + * Select a queue for service. If we have a current in-service queue, + * check whether to continue servicing it, or retrieve and set a new one. + */ +static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + struct request *next_rq; + enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; + + bfqq = bfqd->in_service_queue; + if (bfqq == NULL) + goto new_queue; + + bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); + + if (bfq_may_expire_for_budg_timeout(bfqq) && + !timer_pending(&bfqd->idle_slice_timer) && + !bfq_bfqq_must_idle(bfqq)) + goto expire; + + next_rq = bfqq->next_rq; + /* + * If bfqq has requests queued and it has enough budget left to + * serve them, keep the queue, otherwise expire it. + */ + if (next_rq != NULL) { + if (bfq_serv_to_charge(next_rq, bfqq) > + bfq_bfqq_budget_left(bfqq)) { + reason = BFQ_BFQQ_BUDGET_EXHAUSTED; + goto expire; + } else { + /* + * The idle timer may be pending because we may + * not disable disk idling even when a new request + * arrives. + */ + if (timer_pending(&bfqd->idle_slice_timer)) { + /* + * If we get here: 1) at least a new request + * has arrived but we have not disabled the + * timer because the request was too small, + * 2) then the block layer has unplugged + * the device, causing the dispatch to be + * invoked. + * + * Since the device is unplugged, now the + * requests are probably large enough to + * provide a reasonable throughput. + * So we disable idling. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + } + goto keep_queue; + } + } + + /* + * No requests pending. If the in-service queue still has requests + * in flight (possibly waiting for a completion) or is idling for a + * new request, then keep it. + */ + if (timer_pending(&bfqd->idle_slice_timer) || + (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { + bfqq = NULL; + goto keep_queue; + } + + reason = BFQ_BFQQ_NO_MORE_REQUESTS; +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, reason); +new_queue: + bfqq = bfq_set_in_service_queue(bfqd); + bfq_log(bfqd, "select_queue: new queue %d returned", + bfqq != NULL ? bfqq->pid : 0); +keep_queue: + return bfqq; +} + +static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, old coeff %u, w %d(%d)", + jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time), + bfqq->wr_coeff, + bfqq->entity.weight, bfqq->entity.orig_weight); + + BUG_ON(bfqq != bfqd->in_service_queue && entity->weight != + entity->orig_weight * bfqq->wr_coeff); + if (entity->ioprio_changed) + bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); + + /* + * If too much time has elapsed from the beginning + * of this weight-raising period, or the queue has + * exceeded the acceptable number of cooperations, + * stop it. + */ + if (bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || + time_is_before_jiffies(bfqq->last_wr_start_finish + + bfqq->wr_cur_max_time)) { + bfqq->last_wr_start_finish = jiffies; + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %lu, rais_max_time %u", + bfqq->last_wr_start_finish, + jiffies_to_msecs(bfqq->wr_cur_max_time)); + bfq_bfqq_end_wr(bfqq); + } + } + /* Update weight both if it must be raised and if it must be lowered */ + if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) + __bfq_entity_update_weight_prio( + bfq_entity_service_tree(entity), + entity); +} + +/* + * Dispatch one request from bfqq, moving it to the request queue + * dispatch list. + */ +static int bfq_dispatch_request(struct bfq_data *bfqd, + struct bfq_queue *bfqq) +{ + int dispatched = 0; + struct request *rq; + unsigned long service_to_charge; + + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + + /* Follow expired path, else get first next available. */ + rq = bfq_check_fifo(bfqq); + if (rq == NULL) + rq = bfqq->next_rq; + service_to_charge = bfq_serv_to_charge(rq, bfqq); + + if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { + /* + * This may happen if the next rq is chosen in fifo order + * instead of sector order. The budget is properly + * dimensioned to be always sufficient to serve the next + * request only if it is chosen in sector order. The reason + * is that it would be quite inefficient and little useful + * to always make sure that the budget is large enough to + * serve even the possible next rq in fifo order. + * In fact, requests are seldom served in fifo order. + * + * Expire the queue for budget exhaustion, and make sure + * that the next act_budget is enough to serve the next + * request, even if it comes from the fifo expired path. + */ + bfqq->next_rq = rq; + /* + * Since this dispatch is failed, make sure that + * a new one will be performed + */ + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); + goto expire; + } + + /* Finally, insert request into driver dispatch list. */ + bfq_bfqq_served(bfqq, service_to_charge); + bfq_dispatch_insert(bfqd->queue, rq); + + bfq_update_wr_data(bfqd, bfqq); + + bfq_log_bfqq(bfqd, bfqq, + "dispatched %u sec req (%llu), budg left %lu", + blk_rq_sectors(rq), + (long long unsigned)blk_rq_pos(rq), + bfq_bfqq_budget_left(bfqq)); + + dispatched++; + + if (bfqd->in_service_cic == NULL) { + atomic_long_inc(&RQ_CIC(rq)->ioc->refcount); + bfqd->in_service_cic = RQ_CIC(rq); + } + + if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && + dispatched >= bfqd->bfq_max_budget_async_rq) || + bfq_class_idle(bfqq))) + goto expire; + + return dispatched; + +expire: + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); + return dispatched; +} + +static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) +{ + int dispatched = 0; + + while (bfqq->next_rq != NULL) { + bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); + dispatched++; + } + + BUG_ON(!list_empty(&bfqq->fifo)); + return dispatched; +} + +/* + * Drain our current requests. + * Used for barriers and when switching io schedulers on-the-fly. + */ +static int bfq_forced_dispatch(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq, *n; + struct bfq_service_tree *st; + int dispatched = 0; + + bfqq = bfqd->in_service_queue; + if (bfqq != NULL) { + spin_lock(&bfqd->eqm_lock); + __bfq_bfqq_expire(bfqd, bfqq); + spin_unlock(&bfqd->eqm_lock); + } + + /* + * Loop through classes, and be careful to leave the scheduler + * in a consistent state, as feedback mechanisms and vtime + * updates cannot be disabled during the process. + */ + list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { + st = bfq_entity_service_tree(&bfqq->entity); + + dispatched += __bfq_forced_dispatch_bfqq(bfqq); + bfqq->max_budget = bfq_max_budget(bfqd); + + bfq_forget_idle(st); + } + + BUG_ON(bfqd->busy_queues != 0); + + return dispatched; +} + +static int bfq_dispatch_requests(struct request_queue *q, int force) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq; + int max_dispatch; + + bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); + if (bfqd->busy_queues == 0) + return 0; + + if (unlikely(force)) + return bfq_forced_dispatch(bfqd); + + bfqq = bfq_select_queue(bfqd); + if (bfqq == NULL) + return 0; + + max_dispatch = bfqd->bfq_quantum; + if (bfq_class_idle(bfqq)) + max_dispatch = 1; + + if (!bfq_bfqq_sync(bfqq)) + max_dispatch = bfqd->bfq_max_budget_async_rq; + + if (bfqq->dispatched >= max_dispatch) { + if (bfqd->busy_queues > 1) + return 0; + if (bfqq->dispatched >= 4 * max_dispatch) + return 0; + } + + if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) + return 0; + + bfq_clear_bfqq_wait_request(bfqq); + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + if (!bfq_dispatch_request(bfqd, bfqq)) + return 0; + + bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)", + bfqq->pid, max_dispatch); + + return 1; +} + +/* + * Task holds one reference to the queue, dropped when task exits. Each rq + * in-flight on this queue also holds a reference, dropped when rq is freed. + * + * Queue lock must be held here. + */ +static void bfq_put_queue(struct bfq_queue *bfqq) +{ + struct bfq_data *bfqd = bfqq->bfqd; + + BUG_ON(atomic_read(&bfqq->ref) <= 0); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, + atomic_read(&bfqq->ref)); + if (!atomic_dec_and_test(&bfqq->ref)) + return; + + BUG_ON(rb_first(&bfqq->sort_list) != NULL); + BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); + BUG_ON(bfqq->entity.tree != NULL); + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqd->in_service_queue == bfqq); + + bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); + + kmem_cache_free(bfq_pool, bfqq); +} + +static void bfq_put_cooperator(struct bfq_queue *bfqq) +{ + struct bfq_queue *__bfqq, *next; + + /* + * If this queue was scheduled to merge with another queue, be + * sure to drop the reference taken on that queue (and others in + * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. + */ + __bfqq = bfqq->new_bfqq; + while (__bfqq) { + if (__bfqq == bfqq) + break; + next = __bfqq->new_bfqq; + bfq_put_queue(__bfqq); + __bfqq = next; + } +} + +/* Coop lock is taken in __bfq_exit_single_io_context() */ +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + if (bfqq == bfqd->in_service_queue) { + __bfq_bfqq_expire(bfqd, bfqq); + bfq_schedule_dispatch(bfqd); + } + + bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); +} + +/* + * Update the entity prio values; note that the new values will not + * be used until the next (re)activation. + */ +static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) +{ + struct task_struct *tsk = current; + int ioprio_class; + + if (!bfq_bfqq_prio_changed(bfqq)) + return; + + ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); + switch (ioprio_class) { + default: + dev_err(bfqq->bfqd->queue->backing_dev_info.dev, + "bfq: bad prio %x\n", ioprio_class); + case IOPRIO_CLASS_NONE: + /* + * No prio set, inherit CPU scheduling settings. + */ + bfqq->entity.new_ioprio = task_nice_ioprio(tsk); + bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); + break; + case IOPRIO_CLASS_RT: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; + break; + case IOPRIO_CLASS_BE: + bfqq->entity.new_ioprio = task_ioprio(ioc); + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; + break; + case IOPRIO_CLASS_IDLE: + bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; + bfqq->entity.new_ioprio = 7; + bfq_clear_bfqq_idle_window(bfqq); + break; + } + + bfqq->entity.ioprio_changed = 1; + + bfq_clear_bfqq_prio_changed(bfqq); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic) +{ + struct bfq_data *bfqd; + struct bfq_queue *bfqq, *new_bfqq; + struct bfq_group *bfqg; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&cic->key, &flags); + if (unlikely(bfqd == NULL)) + return; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic->cfqq[BLK_RW_ASYNC]; + if (bfqq != NULL) { + bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, + sched_data); + new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, cic->ioc, + GFP_ATOMIC); + if (new_bfqq != NULL) { + cic->cfqq[BLK_RW_ASYNC] = new_bfqq; + bfq_log_bfqq(bfqd, bfqq, + "changed_ioprio: bfqq %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } + } + + bfqq = cic->cfqq[BLK_RW_SYNC]; + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) + bfq_mark_bfqq_prio_changed(bfqq); + + bfq_put_bfqd_unlock(bfqd, &flags); +} + +static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + pid_t pid, int is_sync) +{ + RB_CLEAR_NODE(&bfqq->entity.rb_node); + INIT_LIST_HEAD(&bfqq->fifo); + + atomic_set(&bfqq->ref, 0); + bfqq->bfqd = bfqd; + + bfq_mark_bfqq_prio_changed(bfqq); + + if (is_sync) { + if (!bfq_class_idle(bfqq)) + bfq_mark_bfqq_idle_window(bfqq); + bfq_mark_bfqq_sync(bfqq); + } + bfq_mark_bfqq_IO_bound(bfqq); + + /* Tentative initial value to trade off between thr and lat */ + bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; + bfqq->pid = pid; + + bfqq->wr_coeff = 1; + bfqq->last_wr_start_finish = 0; + /* + * Set to the value for which bfqq will not be deemed as + * soft rt when it becomes backlogged. + */ + bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies); +} + +static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int is_sync, + struct io_context *ioc, + gfp_t gfp_mask) +{ + struct bfq_queue *bfqq, *new_bfqq = NULL; + struct cfq_io_context *cic; + +retry: + cic = bfq_cic_lookup(bfqd, ioc); + /* cic always exists here */ + bfqq = cic_to_bfqq(cic, is_sync); + + /* + * Always try a new alloc if we fall back to the OOM bfqq + * originally, since it should just be a temporary situation. + */ + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = NULL; + if (new_bfqq != NULL) { + bfqq = new_bfqq; + new_bfqq = NULL; + } else if (gfp_mask & __GFP_WAIT) { + spin_unlock(&bfqd->eqm_lock); + spin_unlock_irq(bfqd->queue->queue_lock); + new_bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + spin_lock_irq(bfqd->queue->queue_lock); + spin_lock(&bfqd->eqm_lock); + if (new_bfqq != NULL) + goto retry; + } else { + bfqq = kmem_cache_alloc_node(bfq_pool, + gfp_mask | __GFP_ZERO, + bfqd->queue->node); + } + + if (bfqq != NULL) { + bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); + bfq_log_bfqq(bfqd, bfqq, "allocated"); + } else { + bfqq = &bfqd->oom_bfqq; + bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); + } + + bfq_init_prio_data(bfqq, ioc); + bfq_init_entity(&bfqq->entity, bfqg); + } + + if (new_bfqq != NULL) + kmem_cache_free(bfq_pool, new_bfqq); + + return bfqq; +} + +static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, + struct bfq_group *bfqg, + int ioprio_class, int ioprio) +{ + switch (ioprio_class) { + case IOPRIO_CLASS_RT: + return &bfqg->async_bfqq[0][ioprio]; + case IOPRIO_CLASS_BE: + return &bfqg->async_bfqq[1][ioprio]; + case IOPRIO_CLASS_IDLE: + return &bfqg->async_idle_bfqq; + default: + BUG(); + } +} + +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask) +{ + const int ioprio = task_ioprio(ioc); + const int ioprio_class = task_ioprio_class(ioc); + struct bfq_queue **async_bfqq = NULL; + struct bfq_queue *bfqq = NULL; + + if (!is_sync) { + async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, + ioprio); + bfqq = *async_bfqq; + } + + if (bfqq == NULL) + bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask); + + /* + * Pin the queue now that it's allocated, scheduler exit will + * prune it. + */ + if (!is_sync && *async_bfqq == NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfqq, atomic_read(&bfqq->ref)); + *async_bfqq = bfqq; + } + + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, + atomic_read(&bfqq->ref)); + return bfqq; +} + +static void bfq_update_io_thinktime(struct bfq_data *bfqd, + struct cfq_io_context *cic) +{ + unsigned long elapsed = jiffies - cic->ttime.last_end_request; + unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); + + cic->ttime.ttime_samples = (7*cic->ttime.ttime_samples + 256) / 8; + cic->ttime.ttime_total = (7*cic->ttime.ttime_total + 256*ttime) / 8; + cic->ttime.ttime_mean = (cic->ttime.ttime_total + 128) / + cic->ttime.ttime_samples; +} + +static void bfq_update_io_seektime(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct request *rq) +{ + sector_t sdist; + u64 total; + + if (bfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - bfqq->last_request_pos; + else + sdist = bfqq->last_request_pos - blk_rq_pos(rq); + + /* + * Don't allow the seek distance to get too large from the + * odd fragment, pagein, etc. + */ + if (bfqq->seek_samples == 0) /* first request, not really a seek */ + sdist = 0; + else if (bfqq->seek_samples <= 60) /* second & third seek */ + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); + else + sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); + + bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; + bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; + total = bfqq->seek_total + (bfqq->seek_samples/2); + do_div(total, bfqq->seek_samples); + bfqq->seek_mean = (sector_t)total; + + bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, + (u64)bfqq->seek_mean); +} + +/* + * Disable idle window if the process thinks too long or seeks so much that + * it doesn't matter. + */ +static void bfq_update_idle_window(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct cfq_io_context *cic) +{ + int enable_idle; + + /* Don't idle for async or idle io prio class. */ + if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) + return; + + /* Idle window just restored, statistics are meaningless. */ + if (bfq_bfqq_just_split(bfqq)) + return; + + enable_idle = bfq_bfqq_idle_window(bfqq); + + if (atomic_read(&cic->ioc->nr_tasks) == 0 || + bfqd->bfq_slice_idle == 0 || + (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && + bfqq->wr_coeff == 1)) + enable_idle = 0; + else if (bfq_sample_valid(cic->ttime.ttime_samples)) { + if (cic->ttime.ttime_mean > bfqd->bfq_slice_idle && + bfqq->wr_coeff == 1) + enable_idle = 0; + else + enable_idle = 1; + } + bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", + enable_idle); + + if (enable_idle) + bfq_mark_bfqq_idle_window(bfqq); + else + bfq_clear_bfqq_idle_window(bfqq); +} + +/* + * Called when a new fs request (rq) is added to bfqq. Check if there's + * something we should do about it. + */ +static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct request *rq) +{ + struct cfq_io_context *cic = RQ_CIC(rq); + + if (rq->cmd_flags & REQ_META) + bfqq->meta_pending++; + + bfq_update_io_thinktime(bfqd, cic); + bfq_update_io_seektime(bfqd, bfqq, rq); + if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) { + bfq_clear_bfqq_constantly_seeky(bfqq); + if (!blk_queue_nonrot(bfqd->queue)) { + BUG_ON(!bfqd->const_seeky_busy_in_flight_queues); + bfqd->const_seeky_busy_in_flight_queues--; + } + } + if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || + !BFQQ_SEEKY(bfqq)) + bfq_update_idle_window(bfqd, bfqq, cic); + bfq_clear_bfqq_just_split(bfqq); + + bfq_log_bfqq(bfqd, bfqq, + "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", + bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), + (long long unsigned)bfqq->seek_mean); + + bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); + + if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) { + int small_req = bfqq->queued[rq_is_sync(rq)] == 1 && + blk_rq_sectors(rq) < 32; + int budget_timeout = bfq_bfqq_budget_timeout(bfqq); + + /* + * There is just this request queued: if the request + * is small and the queue is not to be expired, then + * just exit. + * + * In this way, if the disk is being idled to wait for + * a new request from the in-service queue, we avoid + * unplugging the device and committing the disk to serve + * just a small request. On the contrary, we wait for + * the block layer to decide when to unplug the device: + * hopefully, new requests will be merged to this one + * quickly, then the device will be unplugged and + * larger requests will be dispatched. + */ + if (small_req && !budget_timeout) + return; + + /* + * A large enough request arrived, or the queue is to + * be expired: in both cases disk idling is to be + * stopped, so clear wait_request flag and reset + * timer. + */ + bfq_clear_bfqq_wait_request(bfqq); + del_timer(&bfqd->idle_slice_timer); + + /* + * The queue is not empty, because a new request just + * arrived. Hence we can safely expire the queue, in + * case of budget timeout, without risking that the + * timestamps of the queue are not updated correctly. + * See [1] for more details. + */ + if (budget_timeout) + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); + + /* + * Let the request rip immediately, or let a new queue be + * selected if bfqq has just been expired. + */ + __blk_run_queue(bfqd->queue); + } +} + +static void bfq_insert_request(struct request_queue *q, struct request *rq) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; + + assert_spin_locked(bfqd->queue->queue_lock); + + /* + * An unplug may trigger a requeue of a request from the device + * driver: make sure we are in process context while trying to + * merge two bfq_queues. + */ + spin_lock(&bfqd->eqm_lock); + if (!in_interrupt()) { + new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); + if (new_bfqq != NULL) { + if (cic_to_bfqq(RQ_CIC(rq), 1) != bfqq) + new_bfqq = cic_to_bfqq(RQ_CIC(rq), 1); + /* + * Release the request's reference to the old bfqq + * and make sure one is taken to the shared queue. + */ + new_bfqq->allocated[rq_data_dir(rq)]++; + bfqq->allocated[rq_data_dir(rq)]--; + atomic_inc(&new_bfqq->ref); + bfq_put_queue(bfqq); + if (cic_to_bfqq(RQ_CIC(rq), 1) == bfqq) + bfq_merge_bfqqs(bfqd, RQ_CIC(rq), + bfqq, new_bfqq); + rq->elevator_private[1] = new_bfqq; + bfqq = new_bfqq; + } else + bfq_bfqq_increase_failed_cooperations(bfqq); + } + spin_unlock(&bfqd->eqm_lock); + + bfq_init_prio_data(bfqq, RQ_CIC(rq)->ioc); + + bfq_add_request(rq); + + /* + * Here a newly-created bfq_queue has already started a weight-raising + * period: clear wr_time_left to prevent bfq_bfqq_save_state() + * from assigning it a full weight-raising period. See the detailed + * comments about this field in bfq_init_icq(). + */ + if (bfqq->cic != NULL) + bfqq->cic->wr_time_left = 0; + rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); + list_add_tail(&rq->queuelist, &bfqq->fifo); + + bfq_rq_enqueued(bfqd, bfqq, rq); +} + +static void bfq_update_hw_tag(struct bfq_data *bfqd) +{ + bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, + bfqd->rq_in_driver); + + if (bfqd->hw_tag == 1) + return; + + /* + * This sample is valid if the number of outstanding requests + * is large enough to allow a queueing behavior. Note that the + * sum is not exact, as it's not taking into account deactivated + * requests. + */ + if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) + return; + + if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) + return; + + bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; + bfqd->max_rq_in_driver = 0; + bfqd->hw_tag_samples = 0; +} + +static void bfq_completed_request(struct request_queue *q, struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_data *bfqd = bfqq->bfqd; + bool sync = bfq_bfqq_sync(bfqq); + + bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", + blk_rq_sectors(rq), sync); + + bfq_update_hw_tag(bfqd); + + BUG_ON(!bfqd->rq_in_driver); + BUG_ON(!bfqq->dispatched); + bfqd->rq_in_driver--; + bfqq->dispatched--; + + if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { + bfq_weights_tree_remove(bfqd, &bfqq->entity, + &bfqd->queue_weights_tree); + if (!blk_queue_nonrot(bfqd->queue)) { + BUG_ON(!bfqd->busy_in_flight_queues); + bfqd->busy_in_flight_queues--; + if (bfq_bfqq_constantly_seeky(bfqq)) { + BUG_ON(!bfqd-> + const_seeky_busy_in_flight_queues); + bfqd->const_seeky_busy_in_flight_queues--; + } + } + } + + if (sync) { + bfqd->sync_flight--; + RQ_CIC(rq)->ttime.last_end_request = jiffies; + } + + /* + * If we are waiting to discover whether the request pattern of the + * task associated with the queue is actually isochronous, and + * both requisites for this condition to hold are satisfied, then + * compute soft_rt_next_start (see the comments to the function + * bfq_bfqq_softrt_next_start()). + */ + if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && + RB_EMPTY_ROOT(&bfqq->sort_list)) + bfqq->soft_rt_next_start = + bfq_bfqq_softrt_next_start(bfqd, bfqq); + + /* + * If this is the in-service queue, check if it needs to be expired, + * or if we want to idle in case it has no pending requests. + */ + if (bfqd->in_service_queue == bfqq) { + if (bfq_bfqq_budget_new(bfqq)) + bfq_set_budget_timeout(bfqd); + + if (bfq_bfqq_must_idle(bfqq)) { + bfq_arm_slice_timer(bfqd); + goto out; + } else if (bfq_may_expire_for_budg_timeout(bfqq)) + bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); + else if (RB_EMPTY_ROOT(&bfqq->sort_list) && + (bfqq->dispatched == 0 || + !bfq_bfqq_must_not_expire(bfqq))) + bfq_bfqq_expire(bfqd, bfqq, 0, + BFQ_BFQQ_NO_MORE_REQUESTS); + } + + if (!bfqd->rq_in_driver) + bfq_schedule_dispatch(bfqd); + +out: + return; +} + +static inline int __bfq_may_queue(struct bfq_queue *bfqq) +{ + if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { + bfq_clear_bfqq_must_alloc(bfqq); + return ELV_MQUEUE_MUST; + } + + return ELV_MQUEUE_MAY; +} + +static int bfq_may_queue(struct request_queue *q, int rw) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct task_struct *tsk = current; + struct cfq_io_context *cic; + struct bfq_queue *bfqq; + + /* + * Don't force setup of a queue from here, as a call to may_queue + * does not necessarily imply that a request actually will be + * queued. So just lookup a possibly existing queue, or return + * 'may queue' if that fails. + */ + cic = bfq_cic_lookup(bfqd, tsk->io_context); + if (cic == NULL) + return ELV_MQUEUE_MAY; + + spin_lock(&bfqd->eqm_lock); + bfqq = cic_to_bfqq(cic, rw_is_sync(rw)); + spin_unlock(&bfqd->eqm_lock); + if (bfqq != NULL) { + bfq_init_prio_data(bfqq, cic->ioc); + + return __bfq_may_queue(bfqq); + } + + return ELV_MQUEUE_MAY; +} + +/* + * Queue lock held here. + */ +static void bfq_put_request(struct request *rq) +{ + struct bfq_queue *bfqq = RQ_BFQQ(rq); + + if (bfqq != NULL) { + const int rw = rq_data_dir(rq); + + BUG_ON(!bfqq->allocated[rw]); + bfqq->allocated[rw]--; + + put_io_context(RQ_CIC(rq)->ioc); + + rq->elevator_private[0] = NULL; + rq->elevator_private[1] = NULL; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +/* + * Returns NULL if a new bfqq should be allocated, or the old bfqq if this + * was the last process referring to said bfqq. + */ +static struct bfq_queue * +bfq_split_bfqq(struct cfq_io_context *cic, struct bfq_queue *bfqq) +{ + bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); + + put_io_context(cic->ioc); + + if (bfqq_process_refs(bfqq) == 1) { + bfqq->pid = current->pid; + bfq_clear_bfqq_coop(bfqq); + bfq_clear_bfqq_split_coop(bfqq); + return bfqq; + } + + cic_set_bfqq(cic, NULL, 1); + + bfq_put_cooperator(bfqq); + + bfq_put_queue(bfqq); + return NULL; +} + +/* + * Allocate bfq data structures associated with this request. + */ +static int bfq_set_request(struct request_queue *q, struct request *rq, + gfp_t gfp_mask) +{ + struct bfq_data *bfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + const int rw = rq_data_dir(rq); + const int is_sync = rq_is_sync(rq); + struct bfq_queue *bfqq; + struct bfq_group *bfqg; + unsigned long flags; + bool split = false; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + cic = bfq_get_io_context(bfqd, gfp_mask); + + spin_lock_irqsave(q->queue_lock, flags); + + if (cic == NULL) + goto queue_fail; + + bfqg = bfq_cic_update_cgroup(cic); + + spin_lock(&bfqd->eqm_lock); + +new_queue: + bfqq = cic_to_bfqq(cic, is_sync); + if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { + bfqq = bfq_get_queue(bfqd, bfqg, is_sync, cic->ioc, gfp_mask); + cic_set_bfqq(cic, bfqq, is_sync); + } else { + /* If the queue was seeky for too long, break it apart. */ + if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); + bfqq = bfq_split_bfqq(cic, bfqq); + split = true; + if (!bfqq) + goto new_queue; + } + } + + bfqq->allocated[rw]++; + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, + atomic_read(&bfqq->ref)); + + rq->elevator_private[0] = cic; + rq->elevator_private[1] = bfqq; + + /* + * If a bfq_queue has only one process reference, it is owned + * by only one cfq_io_context: we can set the cic field of the + * bfq_queue to the address of that structure. Also, if the + * queue has just been split, mark a flag so that the + * information is available to the other scheduler hooks. + */ + if (bfqq_process_refs(bfqq) == 1) { + bfqq->cic = cic; + if (split) { + bfq_mark_bfqq_just_split(bfqq); + /* + * If the queue has just been split from a shared + * queue, restore the idle window and the possible + * weight raising period. + */ + bfq_bfqq_resume_state(bfqq, cic); + } + } + + spin_unlock(&bfqd->eqm_lock); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 0; + +queue_fail: + if (cic != NULL) + put_io_context(cic->ioc); + + bfq_schedule_dispatch(bfqd); + spin_unlock_irqrestore(q->queue_lock, flags); + + return 1; +} + +static void bfq_kick_queue(struct work_struct *work) +{ + struct bfq_data *bfqd = + container_of(work, struct bfq_data, unplug_work); + struct request_queue *q = bfqd->queue; + + spin_lock_irq(q->queue_lock); + __blk_run_queue(q); + spin_unlock_irq(q->queue_lock); +} + +/* + * Handler of the expiration of the timer running if the in-service queue + * is idling inside its time slice. + */ +static void bfq_idle_slice_timer(unsigned long data) +{ + struct bfq_data *bfqd = (struct bfq_data *)data; + struct bfq_queue *bfqq; + unsigned long flags; + enum bfqq_expiration reason; + + spin_lock_irqsave(bfqd->queue->queue_lock, flags); + + bfqq = bfqd->in_service_queue; + /* + * Theoretical race here: the in-service queue can be NULL or + * different from the queue that was idling if the timer handler + * spins on the queue_lock and a new request arrives for the + * current queue and there is a full dispatch cycle that changes + * the in-service queue. This can hardly happen, but in the worst + * case we just expire a queue too early. + */ + if (bfqq != NULL) { + bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); + if (bfq_bfqq_budget_timeout(bfqq)) + /* + * Also here the queue can be safely expired + * for budget timeout without wasting + * guarantees + */ + reason = BFQ_BFQQ_BUDGET_TIMEOUT; + else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) + /* + * The queue may not be empty upon timer expiration, + * because we may not disable the timer when the + * first request of the in-service queue arrives + * during disk idling. + */ + reason = BFQ_BFQQ_TOO_IDLE; + else + goto schedule_dispatch; + + bfq_bfqq_expire(bfqd, bfqq, 1, reason); + } + +schedule_dispatch: + bfq_schedule_dispatch(bfqd); + + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); +} + +static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) +{ + del_timer_sync(&bfqd->idle_slice_timer); + cancel_work_sync(&bfqd->unplug_work); +} + +static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, + struct bfq_queue **bfqq_ptr) +{ + struct bfq_group *root_group = bfqd->root_group; + struct bfq_queue *bfqq = *bfqq_ptr; + + bfq_log(bfqd, "put_async_bfqq: %p", bfqq); + if (bfqq != NULL) { + bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); + bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + *bfqq_ptr = NULL; + } +} + +/* + * Release all the bfqg references to its async queues. If we are + * deallocating the group these queues may still contain requests, so + * we reparent them to the root cgroup (i.e., the only one that will + * exist for sure until all the requests on a device are gone). + */ +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) +{ + int i, j; + + for (i = 0; i < 2; i++) + for (j = 0; j < IOPRIO_BE_NR; j++) + __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); + + __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); +} + +static void bfq_exit_queue(struct elevator_queue *e) +{ + struct bfq_data *bfqd = e->elevator_data; + struct request_queue *q = bfqd->queue; + struct bfq_queue *bfqq, *n; + struct cfq_io_context *cic; + + bfq_shutdown_timer_wq(bfqd); + + spin_lock_irq(q->queue_lock); + + while (!list_empty(&bfqd->cic_list)) { + cic = list_entry(bfqd->cic_list.next, struct cfq_io_context, + queue_list); + __bfq_exit_single_io_context(bfqd, cic); + } + + BUG_ON(bfqd->in_service_queue != NULL); + list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) + bfq_deactivate_bfqq(bfqd, bfqq, 0); + + bfq_disconnect_groups(bfqd); + spin_unlock_irq(q->queue_lock); + + bfq_shutdown_timer_wq(bfqd); + + spin_lock(&cic_index_lock); + ida_remove(&cic_index_ida, bfqd->cic_index); + spin_unlock(&cic_index_lock); + + /* Wait for cic->key accessors to exit their grace periods. */ + synchronize_rcu(); + + BUG_ON(timer_pending(&bfqd->idle_slice_timer)); + + bfq_free_root_group(bfqd); + kfree(bfqd); +} + +static int bfq_alloc_cic_index(void) +{ + int index, error; + + do { + if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&cic_index_lock); + error = ida_get_new(&cic_index_ida, &index); + spin_unlock(&cic_index_lock); + if (error && error != -EAGAIN) + return error; + } while (error); + + return index; +} + +static void *bfq_init_queue(struct request_queue *q) +{ + struct bfq_group *bfqg; + struct bfq_data *bfqd; + int i; + + i = bfq_alloc_cic_index(); + if (i < 0) + return NULL; + + bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); + if (bfqd == NULL) + return NULL; + + bfqd->cic_index = i; + + /* + * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. + */ + bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); + atomic_inc(&bfqd->oom_bfqq.ref); + + spin_lock_init(&bfqd->eqm_lock); + INIT_LIST_HEAD(&bfqd->cic_list); + + bfqd->queue = q; + + bfqg = bfq_alloc_root_group(bfqd, q->node); + if (bfqg == NULL) { + kfree(bfqd); + return NULL; + } + + bfqd->root_group = bfqg; +#ifdef CONFIG_CGROUP_BFQIO + bfqd->active_numerous_groups = 0; +#endif + + init_timer(&bfqd->idle_slice_timer); + bfqd->idle_slice_timer.function = bfq_idle_slice_timer; + bfqd->idle_slice_timer.data = (unsigned long)bfqd; + + bfqd->rq_pos_tree = RB_ROOT; + bfqd->queue_weights_tree = RB_ROOT; + bfqd->group_weights_tree = RB_ROOT; + + INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); + + INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->idle_list); + + bfqd->hw_tag = -1; + + bfqd->bfq_max_budget = bfq_default_max_budget; + + bfqd->bfq_quantum = bfq_quantum; + bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; + bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; + bfqd->bfq_back_max = bfq_back_max; + bfqd->bfq_back_penalty = bfq_back_penalty; + bfqd->bfq_slice_idle = bfq_slice_idle; + bfqd->bfq_class_idle_last_service = 0; + bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; + bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; + bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; + + bfqd->bfq_coop_thresh = 2; + bfqd->bfq_failed_cooperations = 7000; + bfqd->bfq_requests_within_timer = 120; + + bfqd->low_latency = true; + + bfqd->bfq_wr_coeff = 20; + bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); + bfqd->bfq_wr_max_time = 0; + bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); + bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500); + bfqd->bfq_wr_max_softrt_rate = 7000; /* + * Approximate rate required + * to playback or record a + * high-definition compressed + * video. + */ + bfqd->wr_busy_queues = 0; + bfqd->busy_in_flight_queues = 0; + bfqd->const_seeky_busy_in_flight_queues = 0; + + /* + * Begin by assuming, optimistically, that the device peak rate is + * equal to the highest reference rate. + */ + bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * + T_fast[blk_queue_nonrot(bfqd->queue)]; + bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)]; + bfqd->device_speed = BFQ_BFQD_FAST; + + return bfqd; +} + +static void bfq_slab_kill(void) +{ + if (bfq_pool != NULL) + kmem_cache_destroy(bfq_pool); + if (bfq_ioc_pool != NULL) + kmem_cache_destroy(bfq_ioc_pool); +} + +static int __init bfq_slab_setup(void) +{ + bfq_pool = KMEM_CACHE(bfq_queue, 0); + if (bfq_pool == NULL) + goto fail; + + bfq_ioc_pool = kmem_cache_create("bfq_io_context", + sizeof(struct cfq_io_context), + __alignof__(struct cfq_io_context), + 0, NULL); + if (bfq_ioc_pool == NULL) + goto fail; + + return 0; +fail: + bfq_slab_kill(); + return -ENOMEM; +} + +static ssize_t bfq_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t bfq_var_store(unsigned long *var, const char *page, + size_t count) +{ + unsigned long new_val; + int ret = kstrtoul(page, 10, &new_val); + + if (ret == 0) + *var = new_val; + + return count; +} + +static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) +{ + struct bfq_data *bfqd = e->elevator_data; + return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? + jiffies_to_msecs(bfqd->bfq_wr_max_time) : + jiffies_to_msecs(bfq_wr_duration(bfqd))); +} + +static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) +{ + struct bfq_queue *bfqq; + struct bfq_data *bfqd = e->elevator_data; + ssize_t num_char = 0; + + spin_lock_irq(bfqd->queue->queue_lock); + + num_char += sprintf(page + num_char, "Active:\n"); + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + num_char += sprintf(page + num_char, "Idle:\n"); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + + spin_unlock_irq(bfqd->queue->queue_lock); + + return num_char; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return bfq_var_show(__data, (page)); \ +} +SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); +SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); +SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); +SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); +SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); +SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); +SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); +SHOW_FUNCTION(bfq_max_budget_async_rq_show, + bfqd->bfq_max_budget_async_rq, 0); +SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); +SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); +SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); +SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); +SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); +SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, + 1); +SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, + 1); +SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t \ +__FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct bfq_data *bfqd = e->elevator_data; \ + unsigned long uninitialized_var(__data); \ + int ret = bfq_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); +STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, + INT_MAX, 1); +STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); +STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, + INT_MAX, 0); +STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, + 1, INT_MAX, 0); +STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); +STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_wr_min_idle_time_store, + &bfqd->bfq_wr_min_idle_time, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_min_inter_arr_async_store, + &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_max_softrt_rate_store, + &bfqd->bfq_wr_max_softrt_rate, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +/* do nothing for the moment */ +static ssize_t bfq_weights_store(struct elevator_queue *e, + const char *page, size_t count) +{ + return count; +} + +static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) +{ + u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); + + if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) + return bfq_calc_max_budget(bfqd->peak_rate, timeout); + else + return bfq_default_max_budget; +} + +static ssize_t bfq_max_budget_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + else { + if (__data > INT_MAX) + __data = INT_MAX; + bfqd->bfq_max_budget = __data; + } + + bfqd->bfq_user_max_budget = __data; + + return ret; +} + +static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data < 1) + __data = 1; + else if (__data > INT_MAX) + __data = INT_MAX; + + bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); + if (bfqd->bfq_user_max_budget == 0) + bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); + + return ret; +} + +static ssize_t bfq_low_latency_store(struct elevator_queue *e, + const char *page, size_t count) +{ + struct bfq_data *bfqd = e->elevator_data; + unsigned long uninitialized_var(__data); + int ret = bfq_var_store(&__data, (page), count); + + if (__data > 1) + __data = 1; + if (__data == 0 && bfqd->low_latency != 0) + bfq_end_wr(bfqd); + bfqd->low_latency = __data; + + return ret; +} + +#define BFQ_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) + +static struct elv_fs_entry bfq_attrs[] = { + BFQ_ATTR(quantum), + BFQ_ATTR(fifo_expire_sync), + BFQ_ATTR(fifo_expire_async), + BFQ_ATTR(back_seek_max), + BFQ_ATTR(back_seek_penalty), + BFQ_ATTR(slice_idle), + BFQ_ATTR(max_budget), + BFQ_ATTR(max_budget_async_rq), + BFQ_ATTR(timeout_sync), + BFQ_ATTR(timeout_async), + BFQ_ATTR(low_latency), + BFQ_ATTR(wr_coeff), + BFQ_ATTR(wr_max_time), + BFQ_ATTR(wr_rt_max_time), + BFQ_ATTR(wr_min_idle_time), + BFQ_ATTR(wr_min_inter_arr_async), + BFQ_ATTR(wr_max_softrt_rate), + BFQ_ATTR(weights), + __ATTR_NULL +}; + +static struct elevator_type iosched_bfq = { + .ops = { + .elevator_merge_fn = bfq_merge, + .elevator_merged_fn = bfq_merged_request, + .elevator_merge_req_fn = bfq_merged_requests, + .elevator_allow_merge_fn = bfq_allow_merge, + .elevator_dispatch_fn = bfq_dispatch_requests, + .elevator_add_req_fn = bfq_insert_request, + .elevator_activate_req_fn = bfq_activate_request, + .elevator_deactivate_req_fn = bfq_deactivate_request, + .elevator_completed_req_fn = bfq_completed_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, + .elevator_set_req_fn = bfq_set_request, + .elevator_put_req_fn = bfq_put_request, + .elevator_may_queue_fn = bfq_may_queue, + .elevator_init_fn = bfq_init_queue, + .elevator_exit_fn = bfq_exit_queue, + .trim = bfq_free_io_context, + }, + .elevator_attrs = bfq_attrs, + .elevator_name = "bfq", + .elevator_owner = THIS_MODULE, +}; + +static int __init bfq_init(void) +{ + /* + * Can be 0 on HZ < 1000 setups. + */ + if (bfq_slice_idle == 0) + bfq_slice_idle = 1; + + if (bfq_timeout_async == 0) + bfq_timeout_async = 1; + + if (bfq_slab_setup()) + return -ENOMEM; + + /* + * Times to load large popular applications for the typical systems + * installed on the reference devices (see the comments before the + * definitions of the two arrays). + */ + T_slow[0] = msecs_to_jiffies(2600); + T_slow[1] = msecs_to_jiffies(1000); + T_fast[0] = msecs_to_jiffies(5500); + T_fast[1] = msecs_to_jiffies(2000); + + /* + * Thresholds that determine the switch between speed classes (see + * the comments before the definition of the array). + */ + device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2; + device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; + + elv_register(&iosched_bfq); + pr_info("BFQ I/O-scheduler version: v7r5"); + + return 0; +} + +static void __exit bfq_exit(void) +{ + DECLARE_COMPLETION_ONSTACK(all_gone); + elv_unregister(&iosched_bfq); + bfq_ioc_gone = &all_gone; + /* bfq_ioc_gone's update must be visible before reading bfq_ioc_count */ + smp_wmb(); + if (elv_ioc_count_read(bfq_ioc_count) != 0) + wait_for_completion(&all_gone); + ida_destroy(&cic_index_ida); + bfq_slab_kill(); +} + +module_init(bfq_init); +module_exit(bfq_exit); + +MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); +MODULE_LICENSE("GPL"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 index 00000000000..7926dce517c --- /dev/null +++ b/block/bfq-sched.c @@ -0,0 +1,1177 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +#ifdef CONFIG_CGROUP_BFQIO +#define for_each_entity(entity) \ + for (; entity != NULL; entity = entity->parent) + +#define for_each_entity_safe(entity, parent) \ + for (; entity && ({ parent = entity->parent; 1; }); entity = parent) + +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd); + +static inline void bfq_update_budget(struct bfq_entity *next_in_service) +{ + struct bfq_entity *bfqg_entity; + struct bfq_group *bfqg; + struct bfq_sched_data *group_sd; + + BUG_ON(next_in_service == NULL); + + group_sd = next_in_service->sched_data; + + bfqg = container_of(group_sd, struct bfq_group, sched_data); + /* + * bfq_group's my_entity field is not NULL only if the group + * is not the root group. We must not touch the root entity + * as it must never become an in-service entity. + */ + bfqg_entity = bfqg->my_entity; + if (bfqg_entity != NULL) + bfqg_entity->budget = next_in_service->budget; +} + +static int bfq_update_next_in_service(struct bfq_sched_data *sd) +{ + struct bfq_entity *next_in_service; + + if (sd->in_service_entity != NULL) + /* will update/requeue at the end of service */ + return 0; + + /* + * NOTE: this can be improved in many ways, such as returning + * 1 (and thus propagating upwards the update) only when the + * budget changes, or caching the bfqq that will be scheduled + * next from this subtree. By now we worry more about + * correctness than about performance... + */ + next_in_service = bfq_lookup_next_entity(sd, 0, NULL); + sd->next_in_service = next_in_service; + + if (next_in_service != NULL) + bfq_update_budget(next_in_service); + + return 1; +} + +static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ + BUG_ON(sd->next_in_service != entity); +} +#else +#define for_each_entity(entity) \ + for (; entity != NULL; entity = NULL) + +#define for_each_entity_safe(entity, parent) \ + for (parent = NULL; entity != NULL; entity = parent) + +static inline int bfq_update_next_in_service(struct bfq_sched_data *sd) +{ + return 0; +} + +static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, + struct bfq_entity *entity) +{ +} + +static inline void bfq_update_budget(struct bfq_entity *next_in_service) +{ +} +#endif + +/* + * Shift for timestamp calculations. This actually limits the maximum + * service allowed in one timestamp delta (small shift values increase it), + * the maximum total weight that can be used for the queues in the system + * (big shift values increase it), and the period of virtual time + * wraparounds. + */ +#define WFQ_SERVICE_SHIFT 22 + +/** + * bfq_gt - compare two timestamps. + * @a: first ts. + * @b: second ts. + * + * Return @a > @b, dealing with wrapping correctly. + */ +static inline int bfq_gt(u64 a, u64 b) +{ + return (s64)(a - b) > 0; +} + +static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = NULL; + + BUG_ON(entity == NULL); + + if (entity->my_sched_data == NULL) + bfqq = container_of(entity, struct bfq_queue, entity); + + return bfqq; +} + + +/** + * bfq_delta - map service into the virtual time domain. + * @service: amount of service. + * @weight: scale factor (weight of an entity or weight sum). + */ +static inline u64 bfq_delta(unsigned long service, + unsigned long weight) +{ + u64 d = (u64)service << WFQ_SERVICE_SHIFT; + + do_div(d, weight); + return d; +} + +/** + * bfq_calc_finish - assign the finish time to an entity. + * @entity: the entity to act upon. + * @service: the service to be charged to the entity. + */ +static inline void bfq_calc_finish(struct bfq_entity *entity, + unsigned long service) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(entity->weight == 0); + + entity->finish = entity->start + + bfq_delta(service, entity->weight); + + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: serv %lu, w %d", + service, entity->weight); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "calc_finish: start %llu, finish %llu, delta %llu", + entity->start, entity->finish, + bfq_delta(service, entity->weight)); + } +} + +/** + * bfq_entity_of - get an entity from a node. + * @node: the node field of the entity. + * + * Convert a node pointer to the relative entity. This is used only + * to simplify the logic of some functions and not as the generic + * conversion mechanism because, e.g., in the tree walking functions, + * the check for a %NULL value would be redundant. + */ +static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) +{ + struct bfq_entity *entity = NULL; + + if (node != NULL) + entity = rb_entry(node, struct bfq_entity, rb_node); + + return entity; +} + +/** + * bfq_extract - remove an entity from a tree. + * @root: the tree root. + * @entity: the entity to remove. + */ +static inline void bfq_extract(struct rb_root *root, + struct bfq_entity *entity) +{ + BUG_ON(entity->tree != root); + + entity->tree = NULL; + rb_erase(&entity->rb_node, root); +} + +/** + * bfq_idle_extract - extract an entity from the idle tree. + * @st: the service tree of the owning @entity. + * @entity: the entity being removed. + */ +static void bfq_idle_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *next; + + BUG_ON(entity->tree != &st->idle); + + if (entity == st->first_idle) { + next = rb_next(&entity->rb_node); + st->first_idle = bfq_entity_of(next); + } + + if (entity == st->last_idle) { + next = rb_prev(&entity->rb_node); + st->last_idle = bfq_entity_of(next); + } + + bfq_extract(&st->idle, entity); + + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +} + +/** + * bfq_insert - generic tree insertion. + * @root: tree root. + * @entity: entity to insert. + * + * This is used for the idle and the active tree, since they are both + * ordered by finish time. + */ +static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) +{ + struct bfq_entity *entry; + struct rb_node **node = &root->rb_node; + struct rb_node *parent = NULL; + + BUG_ON(entity->tree != NULL); + + while (*node != NULL) { + parent = *node; + entry = rb_entry(parent, struct bfq_entity, rb_node); + + if (bfq_gt(entry->finish, entity->finish)) + node = &parent->rb_left; + else + node = &parent->rb_right; + } + + rb_link_node(&entity->rb_node, parent, node); + rb_insert_color(&entity->rb_node, root); + + entity->tree = root; +} + +/** + * bfq_update_min - update the min_start field of a entity. + * @entity: the entity to update. + * @node: one of its children. + * + * This function is called when @entity may store an invalid value for + * min_start due to updates to the active tree. The function assumes + * that the subtree rooted at @node (which may be its left or its right + * child) has a valid min_start value. + */ +static inline void bfq_update_min(struct bfq_entity *entity, + struct rb_node *node) +{ + struct bfq_entity *child; + + if (node != NULL) { + child = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entity->min_start, child->min_start)) + entity->min_start = child->min_start; + } +} + +/** + * bfq_update_active_node - recalculate min_start. + * @node: the node to update. + * + * @node may have changed position or one of its children may have moved, + * this function updates its min_start value. The left and right subtrees + * are assumed to hold a correct min_start value. + */ +static inline void bfq_update_active_node(struct rb_node *node) +{ + struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); + + entity->min_start = entity->start; + bfq_update_min(entity, node->rb_right); + bfq_update_min(entity, node->rb_left); +} + +/** + * bfq_update_active_tree - update min_start for the whole active tree. + * @node: the starting node. + * + * @node must be the deepest modified node after an update. This function + * updates its min_start using the values held by its children, assuming + * that they did not change, and then updates all the nodes that may have + * changed in the path to the root. The only nodes that may have changed + * are the ones in the path or their siblings. + */ +static void bfq_update_active_tree(struct rb_node *node) +{ + struct rb_node *parent; + +up: + bfq_update_active_node(node); + + parent = rb_parent(node); + if (parent == NULL) + return; + + if (node == parent->rb_left && parent->rb_right != NULL) + bfq_update_active_node(parent->rb_right); + else if (parent->rb_left != NULL) + bfq_update_active_node(parent->rb_left); + + node = parent; + goto up; +} + +static void bfq_weights_tree_add(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root); + +static void bfq_weights_tree_remove(struct bfq_data *bfqd, + struct bfq_entity *entity, + struct rb_root *root); + + +/** + * bfq_active_insert - insert an entity in the active tree of its + * group/device. + * @st: the service tree of the entity. + * @entity: the entity being inserted. + * + * The active tree is ordered by finish time, but an extra key is kept + * per each node, containing the minimum value for the start times of + * its children (and the node itself), so it's possible to search for + * the eligible node with the lowest finish time in logarithmic time. + */ +static void bfq_active_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node = &entity->rb_node; +#ifdef CONFIG_CGROUP_BFQIO + struct bfq_sched_data *sd = NULL; + struct bfq_group *bfqg = NULL; + struct bfq_data *bfqd = NULL; +#endif + + bfq_insert(&st->active, entity); + + if (node->rb_left != NULL) + node = node->rb_left; + else if (node->rb_right != NULL) + node = node->rb_right; + + bfq_update_active_tree(node); + +#ifdef CONFIG_CGROUP_BFQIO + sd = entity->sched_data; + bfqg = container_of(sd, struct bfq_group, sched_data); + BUG_ON(!bfqg); + bfqd = (struct bfq_data *)bfqg->bfqd; +#endif + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); +#ifdef CONFIG_CGROUP_BFQIO + else { /* bfq_group */ + BUG_ON(!bfqd); + bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree); + } + if (bfqg != bfqd->root_group) { + BUG_ON(!bfqg); + BUG_ON(!bfqd); + bfqg->active_entities++; + if (bfqg->active_entities == 2) + bfqd->active_numerous_groups++; + } +#endif +} + +/** + * bfq_ioprio_to_weight - calc a weight from an ioprio. + * @ioprio: the ioprio value to convert. + */ +static inline unsigned short bfq_ioprio_to_weight(int ioprio) +{ + BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); + return IOPRIO_BE_NR - ioprio; +} + +/** + * bfq_weight_to_ioprio - calc an ioprio from a weight. + * @weight: the weight value to convert. + * + * To preserve as mush as possible the old only-ioprio user interface, + * 0 is used as an escape ioprio value for weights (numerically) equal or + * larger than IOPRIO_BE_NR + */ +static inline unsigned short bfq_weight_to_ioprio(int weight) +{ + BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); + return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; +} + +static inline void bfq_get_entity(struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + if (bfqq != NULL) { + atomic_inc(&bfqq->ref); + bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + } +} + +/** + * bfq_find_deepest - find the deepest node that an extraction can modify. + * @node: the node being removed. + * + * Do the first step of an extraction in an rb tree, looking for the + * node that will replace @node, and returning the deepest node that + * the following modifications to the tree can touch. If @node is the + * last node in the tree return %NULL. + */ +static struct rb_node *bfq_find_deepest(struct rb_node *node) +{ + struct rb_node *deepest; + + if (node->rb_right == NULL && node->rb_left == NULL) + deepest = rb_parent(node); + else if (node->rb_right == NULL) + deepest = node->rb_left; + else if (node->rb_left == NULL) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right != NULL) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/** + * bfq_active_extract - remove an entity from the active tree. + * @st: the service_tree containing the tree. + * @entity: the entity being removed. + */ +static void bfq_active_extract(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct rb_node *node; +#ifdef CONFIG_CGROUP_BFQIO + struct bfq_sched_data *sd = NULL; + struct bfq_group *bfqg = NULL; + struct bfq_data *bfqd = NULL; +#endif + + node = bfq_find_deepest(&entity->rb_node); + bfq_extract(&st->active, entity); + + if (node != NULL) + bfq_update_active_tree(node); + +#ifdef CONFIG_CGROUP_BFQIO + sd = entity->sched_data; + bfqg = container_of(sd, struct bfq_group, sched_data); + BUG_ON(!bfqg); + bfqd = (struct bfq_data *)bfqg->bfqd; +#endif + if (bfqq != NULL) + list_del(&bfqq->bfqq_list); +#ifdef CONFIG_CGROUP_BFQIO + else { /* bfq_group */ + BUG_ON(!bfqd); + bfq_weights_tree_remove(bfqd, entity, + &bfqd->group_weights_tree); + } + if (bfqg != bfqd->root_group) { + BUG_ON(!bfqg); + BUG_ON(!bfqd); + BUG_ON(!bfqg->active_entities); + bfqg->active_entities--; + if (bfqg->active_entities == 1) { + BUG_ON(!bfqd->active_numerous_groups); + bfqd->active_numerous_groups--; + } + } +#endif +} + +/** + * bfq_idle_insert - insert an entity into the idle tree. + * @st: the service tree containing the tree. + * @entity: the entity to insert. + */ +static void bfq_idle_insert(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) + st->first_idle = entity; + if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) + st->last_idle = entity; + + bfq_insert(&st->idle, entity); + + if (bfqq != NULL) + list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); +} + +/** + * bfq_forget_entity - remove an entity from the wfq trees. + * @st: the service tree. + * @entity: the entity being removed. + * + * Update the device status and forget everything about @entity, putting + * the device reference to it, if it is a queue. Entities belonging to + * groups are not refcounted. + */ +static void bfq_forget_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + BUG_ON(!entity->on_st); + + entity->on_st = 0; + st->wsum -= entity->weight; + if (bfqq != NULL) { + bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", + bfqq, atomic_read(&bfqq->ref)); + bfq_put_queue(bfqq); + } +} + +/** + * bfq_put_idle_entity - release the idle tree ref of an entity. + * @st: service tree for the entity. + * @entity: the entity being released. + */ +static void bfq_put_idle_entity(struct bfq_service_tree *st, + struct bfq_entity *entity) +{ + bfq_idle_extract(st, entity); + bfq_forget_entity(st, entity); +} + +/** + * bfq_forget_idle - update the idle tree if necessary. + * @st: the service tree to act upon. + * + * To preserve the global O(log N) complexity we only remove one entry here; + * as the idle tree will not grow indefinitely this can be done safely. + */ +static void bfq_forget_idle(struct bfq_service_tree *st) +{ + struct bfq_entity *first_idle = st->first_idle; + struct bfq_entity *last_idle = st->last_idle; + + if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && + !bfq_gt(last_idle->finish, st->vtime)) { + /* + * Forget the whole idle tree, increasing the vtime past + * the last finish time of idle entities. + */ + st->vtime = last_idle->finish; + } + + if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) + bfq_put_idle_entity(st, first_idle); +} + +static struct bfq_service_tree * +__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, + struct bfq_entity *entity) +{ + struct bfq_service_tree *new_st = old_st; + + if (entity->ioprio_changed) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + unsigned short prev_weight, new_weight; + struct bfq_data *bfqd = NULL; + struct rb_root *root; +#ifdef CONFIG_CGROUP_BFQIO + struct bfq_sched_data *sd; + struct bfq_group *bfqg; +#endif + + if (bfqq != NULL) + bfqd = bfqq->bfqd; +#ifdef CONFIG_CGROUP_BFQIO + else { + sd = entity->my_sched_data; + bfqg = container_of(sd, struct bfq_group, sched_data); + BUG_ON(!bfqg); + bfqd = (struct bfq_data *)bfqg->bfqd; + BUG_ON(!bfqd); + } +#endif + + BUG_ON(old_st->wsum < entity->weight); + old_st->wsum -= entity->weight; + + if (entity->new_weight != entity->orig_weight) { + entity->orig_weight = entity->new_weight; + entity->ioprio = + bfq_weight_to_ioprio(entity->orig_weight); + } else if (entity->new_ioprio != entity->ioprio) { + entity->ioprio = entity->new_ioprio; + entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + } else + entity->new_weight = entity->orig_weight = + bfq_ioprio_to_weight(entity->ioprio); + + entity->ioprio_class = entity->new_ioprio_class; + entity->ioprio_changed = 0; + + /* + * NOTE: here we may be changing the weight too early, + * this will cause unfairness. The correct approach + * would have required additional complexity to defer + * weight changes to the proper time instants (i.e., + * when entity->finish <= old_st->vtime). + */ + new_st = bfq_entity_service_tree(entity); + + prev_weight = entity->weight; + new_weight = entity->orig_weight * + (bfqq != NULL ? bfqq->wr_coeff : 1); + /* + * If the weight of the entity changes, remove the entity + * from its old weight counter (if there is a counter + * associated with the entity), and add it to the counter + * associated with its new weight. + */ + if (prev_weight != new_weight) { + root = bfqq ? &bfqd->queue_weights_tree : + &bfqd->group_weights_tree; + bfq_weights_tree_remove(bfqd, entity, root); + } + entity->weight = new_weight; + /* + * Add the entity to its weights tree only if it is + * not associated with a weight-raised queue. + */ + if (prev_weight != new_weight && + (bfqq ? bfqq->wr_coeff == 1 : 1)) + /* If we get here, root has been initialized. */ + bfq_weights_tree_add(bfqd, entity, root); + + new_st->wsum += entity->weight; + + if (new_st != old_st) + entity->start = new_st->vtime; + } + + return new_st; +} + +/** + * bfq_bfqq_served - update the scheduler status after selection for + * service. + * @bfqq: the queue being served. + * @served: bytes to transfer. + * + * NOTE: this can be optimized, as the timestamps of upper level entities + * are synchronized every time a new bfqq is selected for service. By now, + * we keep it to better check consistency. + */ +static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) +{ + struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st; + + for_each_entity(entity) { + st = bfq_entity_service_tree(entity); + + entity->service += served; + BUG_ON(entity->service > entity->budget); + BUG_ON(st->wsum == 0); + + st->vtime += bfq_delta(served, st->wsum); + bfq_forget_idle(st); + } + bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); +} + +/** + * bfq_bfqq_charge_full_budget - set the service to the entity budget. + * @bfqq: the queue that needs a service update. + * + * When it's not possible to be fair in the service domain, because + * a queue is not consuming its budget fast enough (the meaning of + * fast depends on the timeout parameter), we charge it a full + * budget. In this way we should obtain a sort of time-domain + * fairness among all the seeky/slow queues. + */ +static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); + + bfq_bfqq_served(bfqq, entity->budget - entity->service); +} + +/** + * __bfq_activate_entity - activate an entity. + * @entity: the entity being activated. + * + * Called whenever an entity is activated, i.e., it is not active and one + * of its children receives a new request, or has to be reactivated due to + * budget exhaustion. It uses the current budget of the entity (and the + * service received if @entity is active) of the queue to calculate its + * timestamps. + */ +static void __bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + + if (entity == sd->in_service_entity) { + BUG_ON(entity->tree != NULL); + /* + * If we are requeueing the current entity we have + * to take care of not charging to it service it has + * not received. + */ + bfq_calc_finish(entity, entity->service); + entity->start = entity->finish; + sd->in_service_entity = NULL; + } else if (entity->tree == &st->active) { + /* + * Requeueing an entity due to a change of some + * next_in_service entity below it. We reuse the + * old start time. + */ + bfq_active_extract(st, entity); + } else if (entity->tree == &st->idle) { + /* + * Must be on the idle tree, bfq_idle_extract() will + * check for that. + */ + bfq_idle_extract(st, entity); + entity->start = bfq_gt(st->vtime, entity->finish) ? + st->vtime : entity->finish; + } else { + /* + * The finish time of the entity may be invalid, and + * it is in the past for sure, otherwise the queue + * would have been on the idle tree. + */ + entity->start = st->vtime; + st->wsum += entity->weight; + bfq_get_entity(entity); + + BUG_ON(entity->on_st); + entity->on_st = 1; + } + + st = __bfq_entity_update_weight_prio(st, entity); + bfq_calc_finish(entity, entity->budget); + bfq_active_insert(st, entity); +} + +/** + * bfq_activate_entity - activate an entity and its ancestors if necessary. + * @entity: the entity to activate. + * + * Activate @entity and all the entities on the path from it to the root. + */ +static void bfq_activate_entity(struct bfq_entity *entity) +{ + struct bfq_sched_data *sd; + + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_in_service(sd)) + /* + * No need to propagate the activation to the + * upper entities, as they will be updated when + * the in-service entity is rescheduled. + */ + break; + } +} + +/** + * __bfq_deactivate_entity - deactivate an entity from its service tree. + * @entity: the entity to deactivate. + * @requeue: if false, the entity will not be put into the idle tree. + * + * Deactivate an entity, independently from its previous state. If the + * entity was not on a service tree just return, otherwise if it is on + * any scheduler tree, extract it from that tree, and if necessary + * and if the caller did not specify @requeue, put it on the idle tree. + * + * Return %1 if the caller should update the entity hierarchy, i.e., + * if the entity was in service or if it was the next_in_service for + * its sched_data; return %0 otherwise. + */ +static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd = entity->sched_data; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + int was_in_service = entity == sd->in_service_entity; + int ret = 0; + + if (!entity->on_st) + return 0; + + BUG_ON(was_in_service && entity->tree != NULL); + + if (was_in_service) { + bfq_calc_finish(entity, entity->service); + sd->in_service_entity = NULL; + } else if (entity->tree == &st->active) + bfq_active_extract(st, entity); + else if (entity->tree == &st->idle) + bfq_idle_extract(st, entity); + else if (entity->tree != NULL) + BUG(); + + if (was_in_service || sd->next_in_service == entity) + ret = bfq_update_next_in_service(sd); + + if (!requeue || !bfq_gt(entity->finish, st->vtime)) + bfq_forget_entity(st, entity); + else + bfq_idle_insert(st, entity); + + BUG_ON(sd->in_service_entity == entity); + BUG_ON(sd->next_in_service == entity); + + return ret; +} + +/** + * bfq_deactivate_entity - deactivate an entity. + * @entity: the entity to deactivate. + * @requeue: true if the entity can be put on the idle tree + */ +static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +{ + struct bfq_sched_data *sd; + struct bfq_entity *parent; + + for_each_entity_safe(entity, parent) { + sd = entity->sched_data; + + if (!__bfq_deactivate_entity(entity, requeue)) + /* + * The parent entity is still backlogged, and + * we don't need to update it as it is still + * in service. + */ + break; + + if (sd->next_in_service != NULL) + /* + * The parent entity is still backlogged and + * the budgets on the path towards the root + * need to be updated. + */ + goto update; + + /* + * If we reach there the parent is no more backlogged and + * we want to propagate the dequeue upwards. + */ + requeue = 1; + } + + return; + +update: + entity = parent; + for_each_entity(entity) { + __bfq_activate_entity(entity); + + sd = entity->sched_data; + if (!bfq_update_next_in_service(sd)) + break; + } +} + +/** + * bfq_update_vtime - update vtime if necessary. + * @st: the service tree to act upon. + * + * If necessary update the service tree vtime to have at least one + * eligible entity, skipping to its start time. Assumes that the + * active tree of the device is not empty. + * + * NOTE: this hierarchical implementation updates vtimes quite often, + * we may end up with reactivated processes getting timestamps after a + * vtime skip done because we needed a ->first_active entity on some + * intermediate node. + */ +static void bfq_update_vtime(struct bfq_service_tree *st) +{ + struct bfq_entity *entry; + struct rb_node *node = st->active.rb_node; + + entry = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entry->min_start, st->vtime)) { + st->vtime = entry->min_start; + bfq_forget_idle(st); + } +} + +/** + * bfq_first_active_entity - find the eligible entity with + * the smallest finish time + * @st: the service tree to select from. + * + * This function searches the first schedulable entity, starting from the + * root of the tree and going on the left every time on this side there is + * a subtree with at least one eligible (start >= vtime) entity. The path on + * the right is followed only if a) the left subtree contains no eligible + * entities and b) no eligible entity has been found yet. + */ +static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) +{ + struct bfq_entity *entry, *first = NULL; + struct rb_node *node = st->active.rb_node; + + while (node != NULL) { + entry = rb_entry(node, struct bfq_entity, rb_node); +left: + if (!bfq_gt(entry->start, st->vtime)) + first = entry; + + BUG_ON(bfq_gt(entry->min_start, st->vtime)); + + if (node->rb_left != NULL) { + entry = rb_entry(node->rb_left, + struct bfq_entity, rb_node); + if (!bfq_gt(entry->min_start, st->vtime)) { + node = node->rb_left; + goto left; + } + } + if (first != NULL) + break; + node = node->rb_right; + } + + BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); + return first; +} + +/** + * __bfq_lookup_next_entity - return the first eligible entity in @st. + * @st: the service tree. + * + * Update the virtual time in @st and return the first eligible entity + * it contains. + */ +static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, + bool force) +{ + struct bfq_entity *entity, *new_next_in_service = NULL; + + if (RB_EMPTY_ROOT(&st->active)) + return NULL; + + bfq_update_vtime(st); + entity = bfq_first_active_entity(st); + BUG_ON(bfq_gt(entity->start, st->vtime)); + + /* + * If the chosen entity does not match with the sched_data's + * next_in_service and we are forcedly serving the IDLE priority + * class tree, bubble up budget update. + */ + if (unlikely(force && entity != entity->sched_data->next_in_service)) { + new_next_in_service = entity; + for_each_entity(new_next_in_service) + bfq_update_budget(new_next_in_service); + } + + return entity; +} + +/** + * bfq_lookup_next_entity - return the first eligible entity in @sd. + * @sd: the sched_data. + * @extract: if true the returned entity will be also extracted from @sd. + * + * NOTE: since we cache the next_in_service entity at each level of the + * hierarchy, the complexity of the lookup can be decreased with + * absolutely no effort just returning the cached next_in_service value; + * we prefer to do full lookups to test the consistency of * the data + * structures. + */ +static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + int extract, + struct bfq_data *bfqd) +{ + struct bfq_service_tree *st = sd->service_tree; + struct bfq_entity *entity; + int i = 0; + + BUG_ON(sd->in_service_entity != NULL); + + if (bfqd != NULL && + jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { + entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, + true); + if (entity != NULL) { + i = BFQ_IOPRIO_CLASSES - 1; + bfqd->bfq_class_idle_last_service = jiffies; + sd->next_in_service = entity; + } + } + for (; i < BFQ_IOPRIO_CLASSES; i++) { + entity = __bfq_lookup_next_entity(st + i, false); + if (entity != NULL) { + if (extract) { + bfq_check_next_in_service(sd, entity); + bfq_active_extract(st + i, entity); + sd->in_service_entity = entity; + sd->next_in_service = NULL; + } + break; + } + } + + return entity; +} + +/* + * Get next queue for service. + */ +static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) +{ + struct bfq_entity *entity = NULL; + struct bfq_sched_data *sd; + struct bfq_queue *bfqq; + + BUG_ON(bfqd->in_service_queue != NULL); + + if (bfqd->busy_queues == 0) + return NULL; + + sd = &bfqd->root_group->sched_data; + for (; sd != NULL; sd = entity->my_sched_data) { + entity = bfq_lookup_next_entity(sd, 1, bfqd); + BUG_ON(entity == NULL); + entity->service = 0; + } + + bfqq = bfq_entity_to_bfqq(entity); + BUG_ON(bfqq == NULL); + + return bfqq; +} + +static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) +{ + if (bfqd->in_service_cic != NULL) { + put_io_context(bfqd->in_service_cic->ioc); + bfqd->in_service_cic = NULL; + } + + bfqd->in_service_queue = NULL; + del_timer(&bfqd->idle_slice_timer); +} + +static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + struct bfq_entity *entity = &bfqq->entity; + + if (bfqq == bfqd->in_service_queue) + __bfq_bfqd_reset_in_service(bfqd); + + bfq_deactivate_entity(entity, requeue); +} + +static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; + + bfq_activate_entity(entity); +} + +/* + * Called when the bfqq no longer has requests pending, remove it from + * the service tree. + */ +static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, + int requeue) +{ + BUG_ON(!bfq_bfqq_busy(bfqq)); + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + + bfq_log_bfqq(bfqd, bfqq, "del from busy"); + + bfq_clear_bfqq_busy(bfqq); + + BUG_ON(bfqd->busy_queues == 0); + bfqd->busy_queues--; + + if (!bfqq->dispatched) { + bfq_weights_tree_remove(bfqd, &bfqq->entity, + &bfqd->queue_weights_tree); + if (!blk_queue_nonrot(bfqd->queue)) { + BUG_ON(!bfqd->busy_in_flight_queues); + bfqd->busy_in_flight_queues--; + if (bfq_bfqq_constantly_seeky(bfqq)) { + BUG_ON(!bfqd-> + const_seeky_busy_in_flight_queues); + bfqd->const_seeky_busy_in_flight_queues--; + } + } + } + if (bfqq->wr_coeff > 1) + bfqd->wr_busy_queues--; + + bfq_deactivate_bfqq(bfqd, bfqq, requeue); +} + +/* + * Called when an inactive queue receives a new request. + */ +static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + BUG_ON(bfq_bfqq_busy(bfqq)); + BUG_ON(bfqq == bfqd->in_service_queue); + + bfq_log_bfqq(bfqd, bfqq, "add to busy"); + + bfq_activate_bfqq(bfqd, bfqq); + + bfq_mark_bfqq_busy(bfqq); + bfqd->busy_queues++; + + if (!bfqq->dispatched) { + if (bfqq->wr_coeff == 1) + bfq_weights_tree_add(bfqd, &bfqq->entity, + &bfqd->queue_weights_tree); + if (!blk_queue_nonrot(bfqd->queue)) { + bfqd->busy_in_flight_queues++; + if (bfq_bfqq_constantly_seeky(bfqq)) + bfqd->const_seeky_busy_in_flight_queues++; + } + } + if (bfqq->wr_coeff > 1) + bfqd->wr_busy_queues++; +} diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 index 00000000000..869fc354da7 --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,742 @@ +/* + * BFQ-v7r5 for 3.1.0: data structures and common functions prototypes. + * + * Based on ideas and code from CFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2010 Paolo Valente + */ + +#ifndef _BFQ_H +#define _BFQ_H + +#include +#include +#include +#include + +#define BFQ_IOPRIO_CLASSES 3 +#define BFQ_CL_IDLE_TIMEOUT (HZ/5) + +#define BFQ_MIN_WEIGHT 1 +#define BFQ_MAX_WEIGHT 1000 + +#define BFQ_DEFAULT_GRP_WEIGHT 10 +#define BFQ_DEFAULT_GRP_IOPRIO 0 +#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE + +struct bfq_entity; + +/** + * struct bfq_service_tree - per ioprio_class service tree. + * @active: tree for active entities (i.e., those backlogged). + * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). + * @first_idle: idle entity with minimum F_i. + * @last_idle: idle entity with maximum F_i. + * @vtime: scheduler virtual time. + * @wsum: scheduler weight sum; active and idle entities contribute to it. + * + * Each service tree represents a B-WF2Q+ scheduler on its own. Each + * ioprio_class has its own independent scheduler, and so its own + * bfq_service_tree. All the fields are protected by the queue lock + * of the containing bfqd. + */ +struct bfq_service_tree { + struct rb_root active; + struct rb_root idle; + + struct bfq_entity *first_idle; + struct bfq_entity *last_idle; + + u64 vtime; + unsigned long wsum; +}; + +/** + * struct bfq_sched_data - multi-class scheduler. + * @in_service_entity: entity in service. + * @next_in_service: head-of-the-line entity in the scheduler. + * @service_tree: array of service trees, one per ioprio_class. + * + * bfq_sched_data is the basic scheduler queue. It supports three + * ioprio_classes, and can be used either as a toplevel queue or as + * an intermediate queue on a hierarchical setup. + * @next_in_service points to the active entity of the sched_data + * service trees that will be scheduled next. + * + * The supported ioprio_classes are the same as in CFQ, in descending + * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. + * Requests from higher priority queues are served before all the + * requests from lower priority queues; among requests of the same + * queue requests are served according to B-WF2Q+. + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_sched_data { + struct bfq_entity *in_service_entity; + struct bfq_entity *next_in_service; + struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; +}; + +/** + * struct bfq_weight_counter - counter of the number of all active entities + * with a given weight. + * @weight: weight of the entities that this counter refers to. + * @num_active: number of active entities with this weight. + * @weights_node: weights tree member (see bfq_data's @queue_weights_tree + * and @group_weights_tree). + */ +struct bfq_weight_counter { + short int weight; + unsigned int num_active; + struct rb_node weights_node; +}; + +/** + * struct bfq_entity - schedulable entity. + * @rb_node: service_tree member. + * @weight_counter: pointer to the weight counter associated with this entity. + * @on_st: flag, true if the entity is on a tree (either the active or + * the idle one of its service_tree). + * @finish: B-WF2Q+ finish timestamp (aka F_i). + * @start: B-WF2Q+ start timestamp (aka S_i). + * @tree: tree the entity is enqueued into; %NULL if not on a tree. + * @min_start: minimum start time of the (active) subtree rooted at + * this entity; used for O(log N) lookups into active trees. + * @service: service received during the last round of service. + * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. + * @weight: weight of the queue + * @parent: parent entity, for hierarchical scheduling. + * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the + * associated scheduler queue, %NULL on leaf nodes. + * @sched_data: the scheduler queue this entity belongs to. + * @ioprio: the ioprio in use. + * @new_weight: when a weight change is requested, the new weight value. + * @orig_weight: original weight, used to implement weight boosting + * @new_ioprio: when an ioprio change is requested, the new ioprio value. + * @ioprio_class: the ioprio_class in use. + * @new_ioprio_class: when an ioprio_class change is requested, the new + * ioprio_class value. + * @ioprio_changed: flag, true when the user requested a weight, ioprio or + * ioprio_class change. + * + * A bfq_entity is used to represent either a bfq_queue (leaf node in the + * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each + * entity belongs to the sched_data of the parent group in the cgroup + * hierarchy. Non-leaf entities have also their own sched_data, stored + * in @my_sched_data. + * + * Each entity stores independently its priority values; this would + * allow different weights on different devices, but this + * functionality is not exported to userspace by now. Priorities and + * weights are updated lazily, first storing the new values into the + * new_* fields, then setting the @ioprio_changed flag. As soon as + * there is a transition in the entity state that allows the priority + * update to take place the effective and the requested priority + * values are synchronized. + * + * Unless cgroups are used, the weight value is calculated from the + * ioprio to export the same interface as CFQ. When dealing with + * ``well-behaved'' queues (i.e., queues that do not spend too much + * time to consume their budget and have true sequential behavior, and + * when there are no external factors breaking anticipation) the + * relative weights at each level of the cgroups hierarchy should be + * guaranteed. All the fields are protected by the queue lock of the + * containing bfqd. + */ +struct bfq_entity { + struct rb_node rb_node; + struct bfq_weight_counter *weight_counter; + + int on_st; + + u64 finish; + u64 start; + + struct rb_root *tree; + + u64 min_start; + + unsigned long service, budget; + unsigned short weight, new_weight; + unsigned short orig_weight; + + struct bfq_entity *parent; + + struct bfq_sched_data *my_sched_data; + struct bfq_sched_data *sched_data; + + unsigned short ioprio, new_ioprio; + unsigned short ioprio_class, new_ioprio_class; + + int ioprio_changed; +}; + +struct bfq_group; + +/** + * struct bfq_queue - leaf schedulable entity. + * @ref: reference counter. + * @bfqd: parent bfq_data. + * @new_bfqq: shared bfq_queue if queue is cooperating with + * one or more other queues. + * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). + * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). + * @sort_list: sorted list of pending requests. + * @next_rq: if fifo isn't expired, next request to serve. + * @queued: nr of requests queued in @sort_list. + * @allocated: currently allocated requests. + * @meta_pending: pending metadata requests. + * @fifo: fifo list of requests in sort_list. + * @entity: entity representing this queue in the scheduler. + * @max_budget: maximum budget allowed from the feedback mechanism. + * @budget_timeout: budget expiration (in jiffies). + * @dispatched: number of requests on the dispatch list or inside driver. + * @flags: status flags. + * @bfqq_list: node for active/idle bfqq list inside our bfqd. + * @seek_samples: number of seeks sampled + * @seek_total: sum of the distances of the seeks sampled + * @seek_mean: mean seek distance + * @last_request_pos: position of the last request enqueued + * @requests_within_timer: number of consecutive pairs of request completion + * and arrival, such that the queue becomes idle + * after the completion, but the next request arrives + * within an idle time slice; used only if the queue's + * IO_bound has been cleared. + * @pid: pid of the process owning the queue, used for logging purposes. + * @last_wr_start_finish: start time of the current weight-raising period if + * the @bfq-queue is being weight-raised, otherwise + * finish time of the last weight-raising period + * @wr_cur_max_time: current max raising time for this queue + * @soft_rt_next_start: minimum time instant such that, only if a new + * request is enqueued after this time instant in an + * idle @bfq_queue with no outstanding requests, then + * the task associated with the queue it is deemed as + * soft real-time (see the comments to the function + * bfq_bfqq_softrt_next_start()) + * @last_idle_bklogged: time of the last transition of the @bfq_queue from + * idle to backlogged + * @service_from_backlogged: cumulative service received from the @bfq_queue + * since the last transition from idle to + * backlogged + * @cic: pointer to the cfq_io_context owning the bfq_queue, set to %NULL if the + * queue is shared + * + * A bfq_queue is a leaf request queue; it can be associated with an + * io_context or more, if it is async or shared between cooperating + * processes. @cgroup holds a reference to the cgroup, to be sure that it + * does not disappear while a bfqq still references it (mostly to avoid + * races between request issuing and task migration followed by cgroup + * destruction). + * All the fields are protected by the queue lock of the containing bfqd. + */ +struct bfq_queue { + atomic_t ref; + struct bfq_data *bfqd; + + /* fields for cooperating queues handling */ + struct bfq_queue *new_bfqq; + struct rb_node pos_node; + struct rb_root *pos_root; + + struct rb_root sort_list; + struct request *next_rq; + int queued[2]; + int allocated[2]; + int meta_pending; + struct list_head fifo; + + struct bfq_entity entity; + + unsigned long max_budget; + unsigned long budget_timeout; + + int dispatched; + + unsigned int flags; + + struct list_head bfqq_list; + + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + sector_t last_request_pos; + + unsigned int requests_within_timer; + + pid_t pid; + + /* weight-raising fields */ + unsigned long wr_cur_max_time; + unsigned long soft_rt_next_start; + unsigned long last_wr_start_finish; + unsigned int wr_coeff; + unsigned long last_idle_bklogged; + unsigned long service_from_backlogged; + + struct cfq_io_context *cic; +}; + +enum bfq_device_speed { + BFQ_BFQD_FAST, + BFQ_BFQD_SLOW, +}; + +/** + * struct bfq_data - per device data structure. + * @queue: request queue for the managed device. + * @root_group: root bfq_group for the device. + * @rq_pos_tree: rbtree sorted by next_request position, + * used when determining if two or more queues + * have interleaving requests (see bfq_close_cooperator). + * @eqm_lock: spinlock used to protect all data structures pertaining + * the Early Queue Merge (EQM) mechanism. + * @active_numerous_groups: number of bfq_groups containing more than one + * active @bfq_entity. + * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by + * weight. Used to keep track of whether all @bfq_queues + * have the same weight. The tree contains one counter + * for each distinct weight associated to some active + * and not weight-raised @bfq_queue (see the comments to + * the functions bfq_weights_tree_[add|remove] for + * further details). + * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted + * by weight. Used to keep track of whether all + * @bfq_groups have the same weight. The tree contains + * one counter for each distinct weight associated to + * some active @bfq_group (see the comments to the + * functions bfq_weights_tree_[add|remove] for further + * details). + * @busy_queues: number of bfq_queues containing requests (including the + * queue under service, even if it is idling). + * @busy_in_flight_queues: number of @bfq_queues containing pending or + * in-flight requests, plus the @bfq_queue in service, + * even if idle but waiting for the possible arrival + * of its next sync request. This field is updated only + * if the device is rotational, but used only if the + * device is also NCQ-capable. The reason why the field + * is updated also for non-NCQ-capable rotational + * devices is related to the fact that the value of + * hw_tag may be set also later than when this field may + * need to be incremented for the first time(s). + * Taking also this possibility into account, to avoid + * unbalanced increments/decrements, would imply more + * overhead than just updating this field regardless of + * the value of hw_tag. + * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues + * (that is, seeky queues that expired + * for budget timeout at least once) + * containing pending or in-flight + * requests, including the in-service + * @bfq_queue if constantly seeky. This + * field is updated only if the device + * is rotational, but used only if the + * device is also NCQ-capable (see the + * comments to @busy_in_flight_queues). + * @raised_busy_queues: number of weight-raised busy bfq_queues. + * @queued: number of queued requests. + * @rq_in_driver: number of requests dispatched and waiting for completion. + * @sync_flight: number of sync requests in the driver. + * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples + * completed requests. + * @hw_tag_samples: nr of samples used to calculate hw_tag. + * @hw_tag: flag set to one if the driver is showing a queueing behavior. + * @budgets_assigned: number of budgets assigned. + * @idle_slice_timer: timer set when idling for the next sequential request + * from the queue under service. + * @unplug_work: delayed work to restart dispatching on the request queue. + * @in_service_queue: @bfq_queue under service. + * @in_service_cic: cfq_io_context (cic) associated with the @in_service_queue. + * @last_position: on-disk position of the last served request. + * @last_budget_start: beginning of the last budget. + * @last_idling_start: beginning of the last idle slice. + * @peak_rate: peak transfer rate observed for a budget. + * @peak_rate_samples: number of samples used to calculate @peak_rate. + * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling. + * @cic_index: use small consequent indexes as radix tree keys to reduce depth + * @cic_list: list of all the cics active on the bfq_data device. + * @group_list: list of all the bfq_groups active on the device. + * @active_list: list of all the bfq_queues active on the device. + * @idle_list: list of all the bfq_queues idle on the device. + * @bfq_quantum: max number of requests dispatched per dispatch round. + * @bfq_fifo_expire: timeout for async/sync requests; when it expires + * requests are served in fifo order. + * @bfq_back_penalty: weight of backward seeks wrt forward ones. + * @bfq_back_max: maximum allowed backward seek. + * @bfq_slice_idle: maximum idling time. + * @bfq_user_max_budget: user-configured max budget value + * (0 for auto-tuning). + * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to + * async queues. + * @bfq_timeout: timeout for bfq_queues to consume their budget; used to + * to prevent seeky queues to impose long latencies to well + * behaved ones (this also implies that seeky queues cannot + * receive guarantees in the service domain; after a timeout + * they are charged for the whole allocated budget, to try + * to preserve a behavior reasonably fair among them, but + * without service-domain guarantees). + * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is + * no more granted any weight-raising. + * @bfq_failed_cooperations: number of consecutive failed cooperation + * chances after which weight-raising is restored + * to a queue subject to more than bfq_coop_thresh + * queue merges. + * @bfq_requests_within_timer: number of consecutive requests that must be + * issued within the idle time slice to set + * again idling to a queue which was marked as + * non-I/O-bound (see the definition of the + * IO_bound flag for further details). + * @bfq_wr_coeff: Maximum factor by which the weight of a boosted + * queue is multiplied + * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies) + * @bfq_wr_rt_max_time: maximum duration for soft real-time processes + * @bfq_wr_min_idle_time: minimum idle period after which weight-raising + * may be reactivated for a queue (in jiffies) + * @bfq_wr_min_inter_arr_async: minimum period between request arrivals + * after which weight-raising may be + * reactivated for an already busy queue + * (in jiffies) + * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, + * sectors per seconds + * @RT_prod: cached value of the product R*T used for computing the maximum + * duration of the weight raising automatically + * @device_speed: device-speed class for the low-latency heuristic + * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions + * + * All the fields are protected by the @queue lock. + */ +struct bfq_data { + struct request_queue *queue; + + struct bfq_group *root_group; +#ifdef CONFIG_CGROUP_BFQIO + int active_numerous_groups; +#endif + + struct rb_root rq_pos_tree; + spinlock_t eqm_lock; + + struct rb_root queue_weights_tree; + struct rb_root group_weights_tree; + + int busy_queues; + int busy_in_flight_queues; + int const_seeky_busy_in_flight_queues; + int wr_busy_queues; + int queued; + int rq_in_driver; + int sync_flight; + + int max_rq_in_driver; + int hw_tag_samples; + int hw_tag; + + int budgets_assigned; + + struct timer_list idle_slice_timer; + struct work_struct unplug_work; + + struct bfq_queue *in_service_queue; + struct cfq_io_context *in_service_cic; + + sector_t last_position; + + ktime_t last_budget_start; + ktime_t last_idling_start; + int peak_rate_samples; + u64 peak_rate; + unsigned long bfq_max_budget; + + unsigned int cic_index; + struct list_head cic_list; + struct hlist_head group_list; + struct list_head active_list; + struct list_head idle_list; + + unsigned int bfq_quantum; + unsigned int bfq_fifo_expire[2]; + unsigned int bfq_back_penalty; + unsigned int bfq_back_max; + unsigned int bfq_slice_idle; + u64 bfq_class_idle_last_service; + + unsigned int bfq_user_max_budget; + unsigned int bfq_max_budget_async_rq; + unsigned int bfq_timeout[2]; + + unsigned int bfq_coop_thresh; + unsigned int bfq_failed_cooperations; + unsigned int bfq_requests_within_timer; + + bool low_latency; + + /* parameters of the low_latency heuristics */ + unsigned int bfq_wr_coeff; + unsigned int bfq_wr_max_time; + unsigned int bfq_wr_rt_max_time; + unsigned int bfq_wr_min_idle_time; + unsigned long bfq_wr_min_inter_arr_async; + unsigned int bfq_wr_max_softrt_rate; + u64 RT_prod; + enum bfq_device_speed device_speed; + + struct bfq_queue oom_bfqq; +}; + +enum bfqq_state_flags { + BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */ + BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ + BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ + BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ + BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ + BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ + BFQ_BFQQ_FLAG_sync, /* synchronous queue */ + BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ + BFQ_BFQQ_FLAG_IO_bound, /* + * bfqq has timed-out at least once + * having consumed at most 2/10 of + * its budget + */ + BFQ_BFQQ_FLAG_constantly_seeky, /* + * bfqq has proved to be slow and + * seeky until budget timeout + */ + BFQ_BFQQ_FLAG_softrt_update, /* + * may need softrt-next-start + * update + */ + BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ + BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ + BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ +}; + +#define BFQ_BFQQ_FNS(name) \ +static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ +{ \ + (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ +} \ +static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ +{ \ + return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ +} + +BFQ_BFQQ_FNS(busy); +BFQ_BFQQ_FNS(wait_request); +BFQ_BFQQ_FNS(must_alloc); +BFQ_BFQQ_FNS(fifo_expire); +BFQ_BFQQ_FNS(idle_window); +BFQ_BFQQ_FNS(prio_changed); +BFQ_BFQQ_FNS(sync); +BFQ_BFQQ_FNS(budget_new); +BFQ_BFQQ_FNS(IO_bound); +BFQ_BFQQ_FNS(constantly_seeky); +BFQ_BFQQ_FNS(coop); +BFQ_BFQQ_FNS(split_coop); +BFQ_BFQQ_FNS(just_split); +BFQ_BFQQ_FNS(softrt_update); +#undef BFQ_BFQQ_FNS + +/* Logging facilities. */ +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) + +#define bfq_log(bfqd, fmt, args...) \ + blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) + +/* Expiration reasons. */ +enum bfqq_expiration { + BFQ_BFQQ_TOO_IDLE = 0, /* + * queue has been idling for + * too long + */ + BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ + BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ + BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ +}; + +#ifdef CONFIG_CGROUP_BFQIO +/** + * struct bfq_group - per (device, cgroup) data structure. + * @entity: schedulable entity to insert into the parent group sched_data. + * @sched_data: own sched_data, to contain child entities (they may be + * both bfq_queues and bfq_groups). + * @group_node: node to be inserted into the bfqio_cgroup->group_data + * list of the containing cgroup's bfqio_cgroup. + * @bfqd_node: node to be inserted into the @bfqd->group_list list + * of the groups active on the same device; used for cleanup. + * @bfqd: the bfq_data for the device this group acts upon. + * @async_bfqq: array of async queues for all the tasks belonging to + * the group, one queue per ioprio value per ioprio_class, + * except for the idle class that has only one queue. + * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). + * @my_entity: pointer to @entity, %NULL for the toplevel group; used + * to avoid too many special cases during group creation/ + * migration. + * @active_entities: number of active entities belonging to the group; + * unused for the root group. Used to know whether there + * are groups with more than one active @bfq_entity + * (see the comments to the function + * bfq_bfqq_must_not_expire()). + * + * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup + * there is a set of bfq_groups, each one collecting the lower-level + * entities belonging to the group that are acting on the same device. + * + * Locking works as follows: + * o @group_node is protected by the bfqio_cgroup lock, and is accessed + * via RCU from its readers. + * o @bfqd is protected by the queue lock, RCU is used to access it + * from the readers. + * o All the other fields are protected by the @bfqd queue lock. + */ +struct bfq_group { + struct bfq_entity entity; + struct bfq_sched_data sched_data; + + struct hlist_node group_node; + struct hlist_node bfqd_node; + + void *bfqd; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; + + struct bfq_entity *my_entity; + + int active_entities; +}; + +/** + * struct bfqio_cgroup - bfq cgroup data structure. + * @css: subsystem state for bfq in the containing cgroup. + * @weight: cgroup weight. + * @ioprio: cgroup ioprio. + * @ioprio_class: cgroup ioprio_class. + * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. + * @group_data: list containing the bfq_group belonging to this cgroup. + * + * @group_data is accessed using RCU, with @lock protecting the updates, + * @ioprio and @ioprio_class are protected by @lock. + */ +struct bfqio_cgroup { + struct cgroup_subsys_state css; + + unsigned short weight, ioprio, ioprio_class; + + spinlock_t lock; + struct hlist_head group_data; +}; +#else +struct bfq_group { + struct bfq_sched_data sched_data; + + struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; + struct bfq_queue *async_idle_bfqq; +}; +#endif + +static inline struct bfq_service_tree * +bfq_entity_service_tree(struct bfq_entity *entity) +{ + struct bfq_sched_data *sched_data = entity->sched_data; + unsigned int idx = entity->ioprio_class - 1; + + BUG_ON(idx >= BFQ_IOPRIO_CLASSES); + BUG_ON(sched_data == NULL); + + return sched_data->service_tree + idx; +} + +static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic, + int is_sync) +{ + return cic->cfqq[!!is_sync]; +} + +static inline void cic_set_bfqq(struct cfq_io_context *cic, + struct bfq_queue *bfqq, int is_sync) +{ + cic->cfqq[!!is_sync] = bfqq; +} + +static inline void call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, + struct cfq_io_context *)) +{ + struct cfq_io_context *cic; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list) + func(ioc, cic); + rcu_read_unlock(); +} + +#define CIC_DEAD_KEY 1ul +#define CIC_DEAD_INDEX_SHIFT 1 + +static inline void *bfqd_dead_key(struct bfq_data *bfqd) +{ + return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); +} + +/** + * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. + * @ptr: a pointer to a bfqd. + * @flags: storage for the flags to be saved. + * + * This function allows cic->key and bfqg->bfqd to be protected by the + * queue lock of the bfqd they reference; the pointer is dereferenced + * under RCU, so the storage for bfqd is assured to be safe as long + * as the RCU read side critical section does not end. After the + * bfqd->queue->queue_lock is taken the pointer is rechecked, to be + * sure that no other writer accessed it. If we raced with a writer, + * the function returns NULL, with the queue unlocked, otherwise it + * returns the dereferenced pointer, with the queue locked. + */ +static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, + unsigned long *flags) +{ + struct bfq_data *bfqd; + + rcu_read_lock(); + bfqd = rcu_dereference(*(struct bfq_data **)ptr); + + if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) { + spin_lock_irqsave(bfqd->queue->queue_lock, *flags); + if (*ptr == bfqd) + goto out; + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); + } + + bfqd = NULL; +out: + rcu_read_unlock(); + return bfqd; +} + +static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, + unsigned long *flags) +{ + spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); +} + +static void bfq_changed_ioprio(struct io_context *ioc, + struct cfq_io_context *cic); +static void bfq_put_queue(struct bfq_queue *bfqq); +static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); +static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, + struct bfq_group *bfqg, int is_sync, + struct io_context *ioc, gfp_t gfp_mask); +static void bfq_end_wr_async_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg); +static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); +static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); + +#endif /* _BFQ_H */ diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 6f9bbd97865..d0d16d4a79a 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include /* for max_pfn/max_low_pfn */ #include @@ -16,13 +17,12 @@ */ static struct kmem_cache *iocontext_cachep; -static void cfq_dtor(struct io_context *ioc) +static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) { - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->dtor(ioc); } } @@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { rcu_read_lock(); - cfq_dtor(ioc); + + hlist_sched_dtor(ioc, &ioc->cic_list); + hlist_sched_dtor(ioc, &ioc->bfq_cic_list); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void cfq_exit(struct io_context *ioc) +static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) { rcu_read_lock(); - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = hlist_entry(list->first, struct cfq_io_context, cic_list); cic->exit(ioc); } rcu_read_unlock(); @@ -74,9 +75,10 @@ void exit_io_context(struct task_struct *task) task->io_context = NULL; task_unlock(task); - if (atomic_dec_and_test(&ioc->nr_tasks)) - cfq_exit(ioc); - + if (atomic_dec_and_test(&ioc->nr_tasks)) { + hlist_sched_exit(ioc, &ioc->cic_list); + hlist_sched_exit(ioc, &ioc->bfq_cic_list); + } put_io_context(ioc); } @@ -89,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->nr_tasks, 1); spin_lock_init(&ioc->lock); - ioc->ioprio_changed = 0; + bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); ioc->ioprio = 0; ioc->last_waited = 0; /* doesn't matter... */ ioc->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ioc->cic_list); + INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); + INIT_HLIST_HEAD(&ioc->bfq_cic_list); ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) ioc->cgroup_changed = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 97c3d462732..0f60ba0ad87 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2934,7 +2934,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); - ioc->ioprio_changed = 0; } static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, @@ -3226,8 +3225,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - smp_read_barrier_depends(); - if (unlikely(ioc->ioprio_changed)) + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) cfq_ioc_set_ioprio(ioc); #ifdef CONFIG_CFQ_GROUP_IOSCHED diff --git a/fs/ioprio.c b/fs/ioprio.c index 7da2a06508e..95a6c2b04e0 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -30,7 +30,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) { - int err; + int err, i; struct io_context *ioc; const struct cred *cred = current_cred(), *tcred; @@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } + /* let other ioc users see the new values */ + smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - ioc->ioprio_changed = 1; + /* make sure schedulers see the new ioprio value */ + wmb(); + for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) + set_bit(i, ioc->ioprio_changed); } task_unlock(task); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index b7fd4c8c70c..f2ca8cf3a88 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -69,4 +69,10 @@ SUBSYS(perf) SUBSYS(timer_slack) #endif -/* */ \ No newline at end of file +/* */ + +#ifdef CONFIG_CGROUP_BFQIO +SUBSYS(bfqio) +#endif + +/* */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 5037a0ad231..6f63e1f2209 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,10 +1,10 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H +#include #include #include -struct cfq_queue; struct cfq_ttime { unsigned long last_end_request; @@ -16,12 +16,19 @@ struct cfq_ttime { struct cfq_io_context { void *key; - struct cfq_queue *cfqq[2]; + void *cfqq[2]; struct io_context *ioc; struct cfq_ttime ttime; + unsigned int wr_time_left; + unsigned int saved_idle_window; + unsigned int saved_IO_bound; + + unsigned int cooperations; + unsigned int failed_cooperations; + struct list_head queue_list; struct hlist_node cic_list; @@ -31,6 +38,16 @@ struct cfq_io_context { struct rcu_head rcu_head; }; +/* + * Indexes into the ioprio_changed bitmap. A bit set indicates that + * the corresponding I/O scheduler needs to see a ioprio update. + */ +enum { + IOC_CFQ_IOPRIO_CHANGED, + IOC_BFQ_IOPRIO_CHANGED, + IOC_IOPRIO_CHANGED_BITS +}; + /* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. @@ -43,7 +60,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; + DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) unsigned short cgroup_changed; @@ -57,6 +74,8 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; + struct radix_tree_root bfq_radix_root; + struct hlist_head bfq_cic_list; void __rcu *ioc_data; }; From 58bf96060b0eb2bce738590983f7d2857ac1ab23 Mon Sep 17 00:00:00 2001 From: Ed Tam Date: Wed, 20 Nov 2013 14:12:49 -0800 Subject: [PATCH 644/678] Enable CONFIG_NETFILTER_XT_TARGET_TCPMSS Bug: 11579326 Signed-off-by: Ed Tam --- arch/arm/configs/tegra3_android_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 4b859b32d14..6d730d24862 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -120,6 +120,7 @@ CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y CONFIG_NETFILTER_XT_TARGET_TPROXY=y CONFIG_NETFILTER_XT_TARGET_TRACE=y CONFIG_NETFILTER_XT_MATCH_COMMENT=y From 0aa293a915e869aff18d6c3b988634228005b09a Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 Oct 2014 20:56:29 -0400 Subject: [PATCH 645/678] ektf3k.c: remove all tap to wake --- drivers/input/touchscreen/ektf3k.c | 750 +++++------------------------ 1 file changed, 116 insertions(+), 634 deletions(-) diff --git a/drivers/input/touchscreen/ektf3k.c b/drivers/input/touchscreen/ektf3k.c index fedb6f082b0..6e4b6993598 100755 --- a/drivers/input/touchscreen/ektf3k.c +++ b/drivers/input/touchscreen/ektf3k.c @@ -2,9 +2,6 @@ * * Copyright (C) 2011 Elan Microelectronics Corporation. * - * Sweep2wake and Doubletap2wake for Nexus 7 (flo) - * Copyright (C) 2013 Aaron Segaert (flar2) asegaert at gmail.com. All rights reserved. - * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. @@ -31,8 +28,6 @@ #include #include -#include - // for linux 2.6.36.3 #include #include @@ -94,7 +89,6 @@ #define IOCTL_RESUME _IOR(ELAN_IOCTLID, 14, int) #define IOCTL_FW_UPDATE _IOR(ELAN_IOCTLID, 22, int) -//don't use firmware update #define FIRMWARE_UPDATE_WITH_HEADER 1 uint16_t checksum_err=0; @@ -103,7 +97,6 @@ int FW_VERSION=0x00; int X_RESOLUTION=0x00; int Y_RESOLUTION=0x00; int FW_ID=0x00; -int BOOTCODE_VERSION=0x00; static int work_lock=0x00; #define USB_NO_Cable 0 @@ -113,9 +106,6 @@ static int work_lock=0x00; #define USB_Cable ((1 << (USB_SHIFT)) | (USB_DETECT_CABLE)) #define USB_AC_Adapter ((1 << (AC_SHIFT)) | (USB_DETECT_CABLE)) #define USB_CALBE_DETECT_MASK (USB_Cable | USB_DETECT_CABLE) -/*use for slim port to hdmi*/ -#define SLIM_HDMI_MODE 10 -#define HDMI_POWER_SOURCE_CMD 3 static unsigned now_usb_cable_status=0; static unsigned int gPrint_point = 0; @@ -172,18 +162,16 @@ struct elan_ktf3k_ts_data { static struct elan_ktf3k_ts_data *private_ts = NULL; static int __fw_packet_handler(struct i2c_client *client, int imediate); static int elan_ktf3k_ts_rough_calibrate(struct i2c_client *client); -static int elan_ktf3k_ts_hw_reset(struct i2c_client *client, unsigned int time); +static int elan_ktf3k_ts_hw_reset(struct i2c_client *client); static int elan_ktf3k_ts_resume(struct i2c_client *client); - #ifdef FIRMWARE_UPDATE_WITH_HEADER -static int firmware_update_header(struct i2c_client *client, unsigned char *firmware, unsigned int page_number); +static int firmware_update_header(struct i2c_client *client, const unsigned char *firmware, unsigned int page_number); #endif - static struct semaphore pSem; static int mTouchStatus[FINGER_NUM] = {0}; #define FIRMWARE_PAGE_SIZE 132 -#define MAX_FIRMWARE_SIZE 52800 +#define MAX_FIRMWARE_SIZE 32868 #define FIRMWARE_ACK_SIZE 2 /* Debug levels */ @@ -201,266 +189,6 @@ static int debug = DEBUG_INFO; printk("[ektf3k]:" __VA_ARGS__); \ } while (0) - -/* sweep2wake */ - -static struct input_dev *sweep2wake_pwrdev; -static DEFINE_MUTEX(s2w_lock); -int dt2w_switch = 1; -int dt2w_switch_temp = 1; -int dt2w_changed = 0; -int s2w_switch = 1; -int s2w_switch_temp = 1; -int s2w_changed = 0; -int s2w_begin_v = 150; -int s2w_end_v = 1200; -int s2w_begin_h = 350; -int s2w_end_h = 1900; -int shortsweep = 0; -bool scr_suspended = false; -int tripoff_vl = 0; -int tripoff_vr = 0; -int tripoff_hd = 0; -int tripoff_hu = 0; -int tripon_vl = 0; -int tripon_vr = 0; -int tripon_hd = 0; -int tripon_hu = 0; -unsigned long triptime_vl = 0; -unsigned long triptime_vr = 0; -unsigned long triptime_hd = 0; -unsigned long triptime_hu = 0; -unsigned long dt2w_time[2] = {0, 0}; -unsigned int dt2w_x[2] = {0, 0}; -unsigned int dt2w_y[2] = {0, 0}; -unsigned int dt2w_2_x[2] = {0, 0}; -unsigned int dt2w_2_y[2] = {0, 0}; -//int is_suspended = 0; -#define S2W_TIMEOUT 50 -#define DT2W_TIMEOUT_MAX 50 -#define DT2W_TIMEOUT_MIN 4 -#define DT2W_DELTA 150 - -static struct wake_lock d2w_wakelock; - -int wake_timeout = 60; - -void sweep2wake_setdev(struct input_dev * input_device) { - sweep2wake_pwrdev = input_device; - return; -} - -EXPORT_SYMBOL(sweep2wake_setdev); - -static void reset_sweep2wake(int s2w, int dt2w) -{ - //reset sweep2wake - if (s2w) { - tripoff_vl = 0; - tripoff_vr = 0; - tripoff_hd = 0; - tripoff_hu = 0; - tripon_vl = 0; - tripon_vr = 0; - tripon_hd = 0; - tripon_hu = 0; - triptime_vl = 0; - triptime_vr = 0; - triptime_hd = 0; - triptime_hu = 0; - } - - //reset doubletap2wake - if (dt2w) { - dt2w_time[0] = 0; - dt2w_x[0] = 0; - dt2w_y[0] = 0; - dt2w_time[1] = 0; - dt2w_x[1] = 0; - dt2w_y[1] = 0; - dt2w_2_x[0] = 0; - dt2w_2_x[1] = 0; - dt2w_2_y[0] = 0; - dt2w_2_y[1] = 0; - } - - return; -} - -static void sweep2wake_presspwr(struct work_struct *sweep2wake_presspwr_work) -{ - reset_sweep2wake(1,1); - - input_event(sweep2wake_pwrdev, EV_KEY, KEY_POWER, 1); - input_event(sweep2wake_pwrdev, EV_SYN, 0, 0); - msleep(20); - input_event(sweep2wake_pwrdev, EV_KEY, KEY_POWER, 0); - input_event(sweep2wake_pwrdev, EV_SYN, 0, 0); - msleep(20); - mutex_unlock(&s2w_lock); -} - -static DECLARE_WORK(sweep2wake_presspwr_work, sweep2wake_presspwr); - -void sweep2wake_pwrtrigger(void) -{ - if (mutex_trylock(&s2w_lock)) - schedule_work(&sweep2wake_presspwr_work); -} - -void sweep2wake_func(int x, int y, unsigned long time, int i) -{ - if (x < 0 || i > 0){ - reset_sweep2wake(1,0); - return; - } - - if (scr_suspended == true && s2w_switch == 1) { - //left->right - if (y < s2w_begin_v) { - tripon_vr = 1; - triptime_vr = time; - } else if (tripon_vr == 1 && y > 488 && time - triptime_vr < 20) { - tripon_vr = 2; - } else if (tripon_vr == 2 && y > 896 && time - triptime_vr < 40) { - tripon_vr = 3; - } else if (tripon_vr == 3 && (y > s2w_end_v) && time - triptime_vr < S2W_TIMEOUT) { - printk(KERN_INFO "[s2w]: ON"); - sweep2wake_pwrtrigger(); - } - //right->left - if (y > s2w_end_v) { - tripon_vl = 1; - triptime_vl = time; - } else if (tripon_vl == 1 && y < 896 && time - triptime_vl < 20) { - tripon_vl = 2; - } else if (tripon_vl == 2 && y < 488 && time - triptime_vl < 40) { - tripon_vl = 3; - } else if (tripon_vl == 3 && y < s2w_begin_v && (time - triptime_vl < S2W_TIMEOUT)) { - printk(KERN_INFO "[s2w]: ON"); - sweep2wake_pwrtrigger(); - } - //top->bottom - if (x < s2w_begin_h) { - tripon_hd = 1; - triptime_hd = time; - } else if (tripon_hd == 1 && x > 748 && time - triptime_hd < 25) { - tripon_hd = 2; - } else if (tripon_hd == 2 && x > 1496 && time - triptime_hd < 45) { - tripon_hd = 3; - } else if (tripon_hd == 3 && x > s2w_end_h && (time - triptime_hd < S2W_TIMEOUT)) { - printk(KERN_INFO "[s2w]: ON"); - sweep2wake_pwrtrigger(); - } - //bottom->top - if (x > s2w_end_h) { - tripon_hu = 1; - triptime_hu = time; - } else if (tripon_hu == 1 && x < 1496 && time - triptime_hu < 25) { - tripon_hu = 2; - } else if (tripon_hu == 2 && x < 748 && time - triptime_hu < 45) { - tripon_hu = 3; - } else if (tripon_hu == 3 && x < (s2w_begin_h) && (time - triptime_hu < S2W_TIMEOUT)) { - printk(KERN_INFO "[s2w]: ON"); - sweep2wake_pwrtrigger(); - } - } - - if (scr_suspended == false && s2w_switch > 0) { - //right->left portrait mode normal - if (y > s2w_end_v && x > 1848 ) { - tripoff_vl = 1; - triptime_vl = time; - } else if (tripoff_vl == 1 && y < 854 && time - triptime_vl < 20) { - tripoff_vl = 2; - } else if (tripoff_vl == 2 && y < 427 && time - triptime_vl < 40) { - tripoff_vl = 3; - } else if (tripoff_vl == 3 && y < (s2w_begin_v) && (time - triptime_vl < S2W_TIMEOUT)) { - printk(KERN_INFO "[s2w]: OFF"); - sweep2wake_pwrtrigger(); - } - //left->right portrait mode upside down - //if (y < 100 && x > 100) { - // tripoff_vr = 1; - // triptime_vr = time; - //} else if (tripoff_vr == 1 && y > 427 && time - triptime_vr < 20) { - // tripoff_vr = 2; - //} else if (tripoff_vr == 2 && y > 854 && time - triptime_vr < 40) { - // tripoff_vr = 3; - //} else if (tripoff_vr == 3 && y > (s2w_end_v) && (time - triptime_vr < S2W_TIMEOUT)) { - // printk(KERN_INFO "[s2w]: OFF"); - // sweep2wake_pwrtrigger(); - //} - //top->bottom - if (x < s2w_begin_h && y > 1244) { - tripoff_hd = 1; - triptime_hd = time; - } else if (tripoff_hd == 1 && x > 748 && time - triptime_hd < 25) { - tripoff_hd = 2; - } else if (tripoff_hd == 2 && x > 1496 && time - triptime_hd < 45) { - tripoff_hd = 3; - } else if (tripoff_hd == 3 && x > s2w_end_h && (time - triptime_hd < S2W_TIMEOUT)) { - printk(KERN_INFO "[s2w]: OFF"); - sweep2wake_pwrtrigger(); - } - //bottom->top - if (x > s2w_end_h && y < 100) { - tripoff_hu = 1; - triptime_hu = time; - } else if (tripoff_hu == 1 && x < 1496 && time - triptime_hu < 25) { - tripoff_hu = 2; - } else if (tripoff_hu == 2 && x < 748 && time - triptime_hu < 45) { - tripoff_hu = 3; - } else if (tripoff_hu == 3 && x < s2w_begin_h && (time - triptime_hu < S2W_TIMEOUT)) { - printk(KERN_INFO "[s2w]: OFF"); - sweep2wake_pwrtrigger(); - } - } -} - -void doubletap2wake_func(int x, int y) -{ - - int delta_x = 0; - int delta_y = 0; - - //printk("x=%d y=%d\n", x, y); - - dt2w_x[1] = dt2w_x[0]; - dt2w_x[0] = x; - dt2w_y[1] = dt2w_y[0]; - dt2w_y[0] = y; - - if (x < 0) { - dt2w_2_x[1] = dt2w_2_x[0]; - dt2w_2_x[0] = dt2w_x[1]; - dt2w_2_y[1] = dt2w_2_y[0]; - dt2w_2_y[0] = dt2w_y[1]; - dt2w_time[1] = dt2w_time[0]; - dt2w_time[0] = jiffies; - - //printk("x0=%d x1=%d time0=%lu time1=%lu\n", dt2w_2_x[0], dt2w_2_x[1], dt2w_time[0], dt2w_time[1]); - - delta_x = (dt2w_2_x[0]-dt2w_2_x[1]); - delta_y = (dt2w_2_y[0]-dt2w_2_y[1]); - - if ((abs(delta_x) < DT2W_DELTA) && (abs(delta_y) < DT2W_DELTA)) { - if ( ((dt2w_time[0] - dt2w_time[1]) > DT2W_TIMEOUT_MIN) - && ((dt2w_time[0] - dt2w_time[1]) < DT2W_TIMEOUT_MAX)) { - - printk("[dt2w]: OFF->ON\n"); - sweep2wake_pwrtrigger(); - - } - } - } - - return; -} - -/* end sweep2wake */ - - int elan_iap_open(struct inode *inode, struct file *filp){ touch_debug(DEBUG_INFO, "[ELAN]into elan_iap_open\n"); if (private_ts == NULL) touch_debug(DEBUG_ERROR, "private_ts is NULL~~~"); @@ -532,7 +260,7 @@ static long elan_iap_ioctl(/*struct inode *inode,*/ struct file *filp, unsign case IOCTL_MINOR_FW_VER: break; case IOCTL_RESET: - return elan_ktf3k_ts_hw_reset(private_ts->client, 0); + return elan_ktf3k_ts_hw_reset(private_ts->client); case IOCTL_IAP_MODE_LOCK: work_lock=1; disable_irq(private_ts->client->irq); @@ -646,128 +374,6 @@ static ssize_t elan_show_status(struct device *dev, struct device_attribute *dev DEVICE_ATTR(elan_touchpanel_status, S_IRUGO, elan_show_status, NULL); - - -/* sweep2wake sysfs */ -static ssize_t elan_ktf3k_sweep2wake_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - size_t count = 0; - - if (s2w_switch == s2w_switch_temp ) - count += sprintf(buf, "%d\n", s2w_switch); - else - count += sprintf(buf, "%d->%d\n", s2w_switch, s2w_switch_temp); - - return count; -} - -static ssize_t elan_ktf3k_sweep2wake_dump(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - if (buf[0] >= '0' && buf[0] <= '2' && buf[1] == '\n') - if (s2w_switch != buf[0] - '0') { - s2w_switch_temp = buf[0] - '0'; - if (!scr_suspended) - s2w_switch = s2w_switch_temp; - else - s2w_changed = 1; - } - - return count; -} - -static DEVICE_ATTR(sweep2wake, (S_IWUSR|S_IRUGO), - elan_ktf3k_sweep2wake_show, elan_ktf3k_sweep2wake_dump); - -static ssize_t elan_ktf3k_shortsweep_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - size_t count = 0; - count += sprintf(buf, "%d\n", shortsweep); - return count; -} - -static ssize_t elan_ktf3k_shortsweep_dump(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - if (buf[0] >= '0' && buf[0] <= '1' && buf[1] == '\n') - if (shortsweep != buf[0] - '0') - shortsweep = buf[0] - '0'; - - if (shortsweep) { - s2w_begin_v = 400 ; - s2w_end_v = 950; - s2w_begin_h = 650; - s2w_end_h = 1600; - } else { - s2w_begin_v = 150; - s2w_end_v = 1200; - s2w_begin_h = 350; - s2w_end_h = 1900; - } - - return count; -} - -static DEVICE_ATTR(shortsweep, (S_IWUSR|S_IRUGO), - elan_ktf3k_shortsweep_show, elan_ktf3k_shortsweep_dump); - -static ssize_t elan_ktf3k_doubletap2wake_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - size_t count = 0; - - if (dt2w_switch == dt2w_switch_temp) - count += sprintf(buf, "%d\n", dt2w_switch); - else - count += sprintf(buf, "%d->%d\n", dt2w_switch, dt2w_switch_temp); - - return count; -} - -static ssize_t elan_ktf3k_doubletap2wake_dump(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - if (buf[0] >= '0' && buf[0] <= '1' && buf[1] == '\n') - if (dt2w_switch != buf[0] - '0') { - dt2w_switch_temp = buf[0] - '0'; - if (!scr_suspended) - dt2w_switch = dt2w_switch_temp; - else - dt2w_changed = 1; - } - - return count; -} - -static DEVICE_ATTR(doubletap2wake, (S_IWUSR|S_IRUGO), - elan_ktf3k_doubletap2wake_show, elan_ktf3k_doubletap2wake_dump); - -static ssize_t elan_ktf3k_wake_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - size_t count = 0; - count += sprintf(buf, "%d\n", wake_timeout); - return count; -} - -static ssize_t elan_ktf3k_wake_timeout_dump(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - unsigned int input; - int ret; - ret = sscanf(buf, "%u", &input); - if (ret != 1) - return -EINVAL; - - wake_timeout = input; - - return count; -} - -static DEVICE_ATTR(wake_timeout, (S_IWUSR|S_IRUGO), - elan_ktf3k_wake_timeout_show, elan_ktf3k_wake_timeout_dump); - -/* end sweep2wake sysfs*/ - - static int check_fw_version(const unsigned char*firmware, unsigned int size, int fw_version){ int id, version; @@ -780,18 +386,12 @@ static int check_fw_version(const unsigned char*firmware, unsigned int size, int (firmware[size - 2*FIRMWARE_PAGE_SIZE + 123] << 8); touch_debug(DEBUG_INFO, "The firmware was version 0x%X and id:0x%X\n", version, id); - - if (id == 0x3029 && BOOTCODE_VERSION >= 0x6046) { - /*if the touch firmware was empty, always update firmware*/ - return fw_version == 0xFFFF ? 1 : version - fw_version; - } else { - /*this buffer doesn't contain the touch firmware*/ - return 0; - } + if(id == 0x3021) + return fw_version == 0xFFFF ? 1 : version - fw_version; // if the touch firmware was empty, always update firmware + else + return 0; // this buffer doesn't contain the touch firmware } - -/* static ssize_t update_firmware(struct device *dev, struct device_attribute *devattr,const char *buf, size_t count) { struct i2c_client *client = to_i2c_client(dev); @@ -827,7 +427,7 @@ static ssize_t update_firmware(struct device *dev, struct device_attribute *deva if(RECOVERY || check_fw_version(firmware, pos, ts->fw_ver) > 0){ touch_debug(DEBUG_INFO, "Firmware update start!\n"); do{ -// ret = firmware_update_header(client, firmware, page_number);//add by mars + ret = firmware_update_header(client, firmware, page_number); touch_debug(DEBUG_INFO, "Firmware update finish ret=%d retry=%d !\n", ret, retry++); }while(ret != 0 && retry < 3); if(ret == 0 && RECOVERY) RECOVERY = 0; @@ -836,25 +436,20 @@ static ssize_t update_firmware(struct device *dev, struct device_attribute *deva return count; } -*/ -//DEVICE_ATTR(update_fw, S_IWUSR, NULL, update_firmware); + +DEVICE_ATTR(update_fw, S_IWUSR, NULL, update_firmware); static struct attribute *elan_attr[] = { &dev_attr_elan_touchpanel_status.attr, &dev_attr_vendor.attr, &dev_attr_gpio.attr, - //&dev_attr_update_fw.attr, -/* sweep2wake sysfs */ - &dev_attr_sweep2wake.attr, - &dev_attr_doubletap2wake.attr, - &dev_attr_wake_timeout.attr, + &dev_attr_update_fw.attr, NULL }; static struct kobject *android_touch_kobj; - static int elan_ktf3k_touch_sysfs_init(void) { int ret ; @@ -865,7 +460,7 @@ static int elan_ktf3k_touch_sysfs_init(void) ret = -ENOMEM; return ret; } -/* ret = sysfs_create_file(android_touch_kobj, &dev_attr_gpio.attr); + ret = sysfs_create_file(android_touch_kobj, &dev_attr_gpio.attr); if (ret) { touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_file failed\n", __func__); return ret; @@ -875,40 +470,13 @@ static int elan_ktf3k_touch_sysfs_init(void) touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); return ret; } -*/ -/* sweep2wake sysfs */ - ret = sysfs_create_file(android_touch_kobj, &dev_attr_sweep2wake.attr); - if (ret) { - touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); - return ret; - } - ret = sysfs_create_file(android_touch_kobj, &dev_attr_doubletap2wake.attr); - if (ret) { - touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); - return ret; - } - ret = sysfs_create_file(android_touch_kobj, &dev_attr_shortsweep.attr); - if (ret) { - touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); - return ret; - } - ret = sysfs_create_file(android_touch_kobj, &dev_attr_wake_timeout.attr); - if (ret) { - touch_debug(DEBUG_ERROR, "[elan]%s: sysfs_create_group failed\n", __func__); - return ret; - } return 0 ; } static void elan_touch_sysfs_deinit(void) { -// sysfs_remove_file(android_touch_kobj, &dev_attr_vendor.attr); -// sysfs_remove_file(android_touch_kobj, &dev_attr_gpio.attr); -/* sweep2wake sysfs */ - sysfs_remove_file(android_touch_kobj, &dev_attr_sweep2wake.attr); - sysfs_remove_file(android_touch_kobj, &dev_attr_doubletap2wake.attr); - sysfs_remove_file(android_touch_kobj, &dev_attr_shortsweep.attr); - sysfs_remove_file(android_touch_kobj, &dev_attr_wake_timeout.attr); + sysfs_remove_file(android_touch_kobj, &dev_attr_vendor.attr); + sysfs_remove_file(android_touch_kobj, &dev_attr_gpio.attr); kobject_del(android_touch_kobj); } @@ -968,7 +536,7 @@ static int elan_ktf3k_ts_read_command(struct i2c_client *client, u8* cmd, u16 cmd_length, u8 *value, u16 value_length){ struct i2c_adapter *adapter = client->adapter; struct i2c_msg msg[2]; - //__le16 le_addr; + __le16 le_addr; struct elan_ktf3k_ts_data *ts; int length = 0; @@ -993,7 +561,7 @@ static int elan_ktf3k_i2c_read_packet(struct i2c_client *client, u8 *value, u16 value_length){ struct i2c_adapter *adapter = client->adapter; struct i2c_msg msg[1]; - //__le16 le_addr; + __le16 le_addr; struct elan_ktf3k_ts_data *ts; int length = 0; @@ -1017,7 +585,7 @@ static int __hello_packet_handler(struct i2c_client *client) { int rc; uint8_t buf_recv[4] = { 0 }; - //uint8_t buf_recv1[4] = { 0 }; + uint8_t buf_recv1[4] = { 0 }; rc = elan_ktf3k_ts_poll(client); if (rc < 0) { @@ -1047,7 +615,7 @@ static int wait_for_IRQ_Low(struct i2c_client *client, int utime){ return 0; }while(retry_times-- > 0); - touch_debug(DEBUG_INFO,"Wait IRQ time out\n"); + touch_debug("Wait IRQ time out\n"); return -1; } @@ -1060,7 +628,6 @@ static int __fw_packet_handler(struct i2c_client *client, int immediate) uint8_t cmd_x[] = {0x53, 0x60, 0x00, 0x00}; /*Get x resolution*/ uint8_t cmd_y[] = {0x53, 0x63, 0x00, 0x00}; /*Get y resolution*/ uint8_t cmd_id[] = {0x53, 0xf0, 0x00, 0x01}; /*Get firmware ID*/ - uint8_t cmd_boot_id[] = {0x53, 0x10, 0x00, 0x01};/*Get boot code version*/ uint8_t buf_recv[4] = {0}; // Firmware version rc = elan_ktf3k_ts_read_command(client, cmd, 4, buf_recv, 4); @@ -1116,20 +683,6 @@ static int __fw_packet_handler(struct i2c_client *client, int immediate) FW_ID = ts->fw_id; touch_debug(DEBUG_INFO, "[elan] %s: firmware id: 0x%4.4x\n", __func__, ts->fw_id); } -/*boot code version*/ - rc = elan_ktf3k_ts_read_command(client, cmd_boot_id, 4, buf_recv, 4); - if (rc < 0) - return rc; - - if (immediate) { - wait_for_IRQ_Low(client, 1000); - elan_ktf3k_i2c_read_packet(client, buf_recv, 4); - major = ((buf_recv[1] & 0x0f) << 4) | ((buf_recv[2] & 0xf0) >> 4); - minor = ((buf_recv[2] & 0x0f) << 4) | ((buf_recv[3] & 0xf0) >> 4); - - BOOTCODE_VERSION = major << 8 | minor; - touch_debug(DEBUG_INFO, "[elan] %s: boot code id: 0x%4.4x\n", __func__, BOOTCODE_VERSION); - } return 0; } @@ -1154,7 +707,7 @@ static int elan_ktf3k_ts_setup(struct i2c_client *client) int rc, count = 10; retry: // Reset - elan_ktf3k_ts_hw_reset(client, 250); + elan_ktf3k_ts_hw_reset(client); // Check if old firmware. If not, send the notmal_command to enter normal mode if( isOldFW(client) == 0 ){ //if check is new bootcode touch_debug(DEBUG_INFO, "The boot code is new!\n"); @@ -1250,14 +803,14 @@ static int elan_ktf3k_ts_get_power_state(struct i2c_client *client) return power_state; } -static int elan_ktf3k_ts_hw_reset(struct i2c_client *client, unsigned int time) +static int elan_ktf3k_ts_hw_reset(struct i2c_client *client) { struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); touch_debug(DEBUG_INFO, "[ELAN] Start HW reset!\n"); gpio_direction_output(ts->rst_gpio, 0); usleep_range(1000,1500); gpio_direction_output(ts->rst_gpio, 1); - if(time) msleep(time); + msleep(250); return 0; } @@ -1268,11 +821,9 @@ static int elan_ktf3k_ts_set_power_source(struct i2c_client *client, u8 state) int length = 0; dev_dbg(&client->dev, "[elan] %s: enter\n", __func__); - /* - 0x52 0x40 0x00 0x01 => Battery Mode - 0x52 0x41 0x00 0x01 => USB and AC Adapter Mode - 0x52 0x43 0x00 0x01 => SLIM Port to HDMI - */ + /*0x52 0x40 0x00 0x01 => Battery Mode + 0x52 0x41 0x00 0x01 => USB and AC Adapter Mode + */ cmd[1] |= state & 0x0F; dev_dbg(&client->dev, @@ -1291,12 +842,11 @@ static int elan_ktf3k_ts_set_power_source(struct i2c_client *client, u8 state) return 0; } -/* static int elan_ktf3k_ts_get_power_source(struct i2c_client *client) { int rc = 0; uint8_t cmd[] = {CMD_R_PKT, 0x40, 0x00, 0x01}; - uint8_t buf[4] = {0}; + uint8_t buf[4] = {0}, power_source; //rc = elan_ktf2k_ts_get_data(client, cmd, buf, 4); rc = elan_ktf3k_ts_read_command(client, cmd, 4, buf, 4); @@ -1305,17 +855,12 @@ static int elan_ktf3k_ts_get_power_source(struct i2c_client *client) return 0; } -*/ -static void update_power_source(void){ +static void update_power_source(){ unsigned power_source = now_usb_cable_status; if(private_ts == NULL || work_lock) return; // Send power state 1 if USB cable and AC charger was plugged on. - if (power_source == SLIM_HDMI_MODE) { - elan_ktf3k_ts_set_power_source(private_ts->client, HDMI_POWER_SOURCE_CMD); - } else { - elan_ktf3k_ts_set_power_source(private_ts->client, power_source != USB_NO_Cable); - } + elan_ktf3k_ts_set_power_source(private_ts->client, power_source != USB_NO_Cable); } void touch_callback(unsigned cable_status){ @@ -1348,7 +893,7 @@ static void elan_ktf3k_ts_report_data(struct i2c_client *client, uint8_t *buf) { struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); struct input_dev *idev = ts->input_dev; - uint16_t x = 0, y = 0, touch_size, pressure_size; + uint16_t x, y, touch_size, pressure_size; uint16_t fbits=0, checksum=0; uint8_t i, num; static uint8_t size_index[10] = {35, 35, 36, 36, 37, 37, 38, 38, 39, 39}; @@ -1378,21 +923,19 @@ static void elan_ktf3k_ts_report_data(struct i2c_client *client, uint8_t *buf) input_report_abs(idev, ABS_MT_POSITION_X, y); input_report_abs(idev, ABS_MT_POSITION_Y, x); if(unlikely(gPrint_point)) touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); - - } + } } mTouchStatus[i] = active; fbits = fbits >> 1; idx += 3; } - input_sync(idev); } // checksum else { checksum_err +=1; touch_debug(DEBUG_ERROR, "[elan] Checksum Error %d byte[2]=%X\n", checksum_err, buf[2]); } - + return; } @@ -1406,62 +949,43 @@ static void elan_ktf3k_ts_report_data2(struct i2c_client *client, uint8_t *buf) uint16_t active = 0; uint8_t idx=IDX_FINGER; - num = buf[2] & 0xf; + num = buf[2] & 0xf; for (i=0; i<34;i++) checksum +=buf[i]; - + if ( (num < 3) || ((checksum & 0x00ff) == buf[34])) { - fbits = buf[2] & 0x30; - fbits = (fbits << 4) | buf[1]; - - //input_report_key(idev, BTN_TOUCH, 1); - - for(i = 0; i < FINGER_NUM; i++){ - active = fbits & 0x1; - if(active || mTouchStatus[i]){ - input_mt_slot(ts->input_dev, i); - input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, active); - if(active){ - elan_ktf3k_ts_parse_xy(&buf[idx], &x, &y); - x = x > ts->abs_x_max ? 0 : ts->abs_x_max - x; - y = y > ts->abs_y_max ? ts->abs_y_max : y; - touch_size = buf[35 + i]; - pressure_size = buf[45 + i]; - input_report_abs(idev, ABS_MT_TOUCH_MAJOR, touch_size); - input_report_abs(idev, ABS_MT_PRESSURE, pressure_size); - input_report_abs(idev, ABS_MT_POSITION_X, y); - input_report_abs(idev, ABS_MT_POSITION_Y, x); - if(unlikely(gPrint_point)) - touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); - /* sweep2wake */ - if (s2w_switch > 0) - sweep2wake_func(x, y, jiffies, i); - if (dt2w_switch && scr_suspended) - doubletap2wake_func(x, y); - /* end sweep2wake */ - } - } - mTouchStatus[i] = active; - fbits = fbits >> 1; - idx += 3; - } - input_sync(idev); + fbits = buf[2] & 0x30; + fbits = (fbits << 4) | buf[1]; + //input_report_key(idev, BTN_TOUCH, 1); + for(i = 0; i < FINGER_NUM; i++){ + active = fbits & 0x1; + if(active || mTouchStatus[i]){ + input_mt_slot(ts->input_dev, i); + input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, active); + if(active){ + elan_ktf3k_ts_parse_xy(&buf[idx], &x, &y); + x = x > ts->abs_x_max ? 0 : ts->abs_x_max - x; + y = y > ts->abs_y_max ? ts->abs_y_max : y; + touch_size = buf[35 + i]; + pressure_size = buf[45 + i]; + input_report_abs(idev, ABS_MT_TOUCH_MAJOR, touch_size); + input_report_abs(idev, ABS_MT_PRESSURE, pressure_size); + input_report_abs(idev, ABS_MT_POSITION_X, y); + input_report_abs(idev, ABS_MT_POSITION_Y, x); + if(unlikely(gPrint_point)) touch_debug(DEBUG_INFO, "[elan] finger id=%d X=%d y=%d size=%d pressure=%d\n", i, x, y, touch_size, pressure_size); + } + } + mTouchStatus[i] = active; + fbits = fbits >> 1; + idx += 3; + } + input_sync(idev); } // checksum - else { checksum_err +=1; touch_debug(DEBUG_ERROR, "[elan] Checksum Error %d byte[2]=%X\n", checksum_err, buf[2]); } - /* sweep2wake */ - if (checksum == 99) { - if (s2w_switch > 0) - sweep2wake_func(-1, -1, jiffies, i); - if (dt2w_switch && scr_suspended) - doubletap2wake_func(-1, -1); - } - /* end sweep2wake */ - return; } @@ -1594,7 +1118,7 @@ static irqreturn_t elan_ktf3k_ts_irq_handler(int irq, void *dev_id) { struct elan_ktf3k_ts_data *ts = dev_id; struct i2c_client *client = ts->client; - + dev_dbg(&client->dev, "[elan] %s\n", __func__); disable_irq_nosync(ts->client->irq); queue_work(ts->elan_wq, &ts->work); @@ -1692,7 +1216,6 @@ static int ektf_proc_write(struct file *file, const char *buffer, unsigned long } #endif // #ifdef _ENABLE_DBG_LEV - #ifdef FIRMWARE_UPDATE_WITH_HEADER #define FIRMWARE_PAGE_SIZE 132 static unsigned char touch_firmware[] = { @@ -1702,25 +1225,21 @@ static unsigned char touch_firmware[] = { #define SIZE_PER_PACKET 4 static int sendI2CPacket(struct i2c_client *client, const unsigned char *buf, unsigned int length){ - int ret, i; - int retry_times = 10; + int ret, i, retry_times = 10; for(i = 0; i < length; i += ret){ ret = i2c_master_send(client, buf + i, length < SIZE_PER_PACKET ? length : SIZE_PER_PACKET); - - if(ret <= 0){ + if(ret <= 0){ retry_times--; ret = 0; - } + } if(ret < (length < SIZE_PER_PACKET ? length : SIZE_PER_PACKET)){ - touch_debug(DEBUG_INFO,"Sending packet broken\n"); - //printk("[ektf3k]:Sending packet broken\n"); + touch_debug("Sending packet broken\n"); } + if(retry_times < 0){ - touch_debug(DEBUG_INFO,"Failed sending I2C touch firmware packet.\n"); - //printk("[ektf3k]:Failed sending I2C touch firmware packet.\n"); + touch_debug("Failed sending I2C touch firmware packet.\n"); break; } - } return i; @@ -1736,8 +1255,7 @@ static int recvI2CPacket(struct i2c_client *client, unsigned char *buf, unsigned } if(retry_times < 0){ - touch_debug(DEBUG_INFO,"Failed sending I2C touch firmware packet.\n"); - //printk("[ektf3k]:Failed sending I2C touch firmware packet.\n"); + touch_debug("Failed sending I2C touch firmware packet.\n"); break; } } @@ -1746,15 +1264,14 @@ static int recvI2CPacket(struct i2c_client *client, unsigned char *buf, unsigned } -static int firmware_update_header(struct i2c_client *client, unsigned char *firmware, unsigned int pages_number){ - - int ret, i; - int sendCount; - int recvCount; - int write_times; +static int firmware_update_header(struct i2c_client *client, const unsigned char *firmware, unsigned int pages_number){ + int ret, i, mode; + int retry_times = 3, write_times; unsigned char packet_data[8] = {0}; + unsigned char isp_cmd[4] = {0x54, 0x00, 0x12, 0x34}; unsigned char nb_isp_cmd[4] = {0x45, 0x49, 0x41, 0x50}; unsigned char *cursor; + int boot_code = 0; struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); if(ts == NULL) @@ -1764,13 +1281,27 @@ static int firmware_update_header(struct i2c_client *client, unsigned char *firm disable_irq(client->irq); // Blocking call no need to do extra wait wake_lock(&ts->wakelock); work_lock = 1; - /*add delay for waiting bootcode initial*/ - elan_ktf3k_ts_hw_reset(client, 20); - touch_debug(DEBUG_INFO, "Send command into IAP mode\n"); - /*get into IAP mode*/ - if (sendI2CPacket(client, nb_isp_cmd, sizeof(nb_isp_cmd)) < 0) - goto fw_update_failed; + elan_ktf3k_ts_hw_reset(client); + // Step 1: Check boot code version + boot_code = gpio_get_value(ts->intr_gpio); + if(boot_code == 0){ // if the boot code is old + touch_debug(DEBUG_INFO, "The firmware update of old boot code\n"); + if(recvI2CPacket(client, packet_data, 4) < 0) + goto fw_update_failed; + + touch_debug(DEBUG_INFO, "The received bytes 0x%X 0x%X 0x%X 0x%X\n", packet_data[0], packet_data[1], + packet_data[2], packet_data[3]); + if(packet_data[0] == 0x55 && packet_data[1] == 0x55 && packet_data[2] == 0x80 && packet_data[3] == 0x80) + touch_debug(DEBUG_INFO, "In the recovery mode\n"); + if(sendI2CPacket(client, isp_cmd, sizeof(isp_cmd)) < 0) // get into ISP mode + goto fw_update_failed; + }else{ // if the boot code is new + touch_debug(DEBUG_INFO, "The firmware update of new boot code\n"); + if(sendI2CPacket(client, nb_isp_cmd, sizeof(nb_isp_cmd)) < 0) // get into ISP mode + goto fw_update_failed; + } + msleep(100); packet_data[0] = 0x10; if(sendI2CPacket(client, packet_data, 1) < 0) // send dummy byte @@ -1783,14 +1314,14 @@ static int firmware_update_header(struct i2c_client *client, unsigned char *firm page_write_retry: touch_debug(DEBUG_MESSAGES, "Update page number %d\n", i); - + int sendCount; if((sendCount = sendI2CPacket(client, cursor, FIRMWARE_PAGE_SIZE)) != FIRMWARE_PAGE_SIZE){ dev_err(&client->dev, "Fail to Update page number %d\n", i); goto fw_update_failed; } touch_debug(DEBUG_INFO, "sendI2CPacket send %d bytes\n", sendCount); - msleep(25); + int recvCount; if((recvCount = recvI2CPacket(client, packet_data, FIRMWARE_ACK_SIZE)) != FIRMWARE_ACK_SIZE){ dev_err(&client->dev, "Fail to Update page number %d\n", i); goto fw_update_failed; @@ -1798,7 +1329,6 @@ static int firmware_update_header(struct i2c_client *client, unsigned char *firm touch_debug(DEBUG_INFO, "recvI2CPacket recv %d bytes: %x %x\n", recvCount, packet_data[0], packet_data[1]); - if(packet_data[0] != 0xaa || packet_data[1] != 0xaa){ touch_debug(DEBUG_INFO, "message received: %02X %02X Page %d rewrite\n", packet_data[0], packet_data[1], i); if(write_times++ > 3) @@ -1806,27 +1336,22 @@ static int firmware_update_header(struct i2c_client *client, unsigned char *firm goto page_write_retry; } - cursor += FIRMWARE_PAGE_SIZE; } - elan_ktf3k_ts_hw_reset(client, 0); - - /*check irq*/ - wait_for_IRQ_Low(client, 500000);/*500ms * 10*/ - - if (recvI2CPacket(client, packet_data, 4) < 0) - goto fw_update_failed; - /*add debug message for hello packet*/ - touch_debug(DEBUG_INFO, "[elan] %s: hello packet %2x:%2X:%2x:%2x\n", __func__, packet_data[0], packet_data[1], packet_data[2], packet_data[3]); - - __fw_packet_handler(ts->client, 1); + elan_ktf3k_ts_hw_reset(client); + if(boot_code) + msleep(2000); + else + msleep(300); + if(recvI2CPacket(client, packet_data, 4) < 0) + goto fw_update_failed; + __fw_packet_handler(ts->client, 1); ret = 0; goto fw_update_finish; fw_update_failed: ret = -1; - touch_debug(DEBUG_INFO, "Failed the touch firmware update!\n"); fw_update_finish: work_lock = 0; wake_unlock(&ts->wakelock); @@ -1872,11 +1397,11 @@ int elan_stress_release(struct inode *inode, struct file *filp) return 0; /* success */ } -long elan_stress_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +int elan_stress_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int err = 1; - printk("[elan_stress_ioctl]%d\n", cmd); + printk("%s\n", __func__, cmd); if (_IOC_TYPE(cmd) != STRESS_IOC_MAGIC) return -ENOTTY; if (_IOC_NR(cmd) > STRESS_IOC_MAXNR) @@ -1969,7 +1494,6 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, ts->status = 1; // set I2C status is OK; wake_lock_init(&ts->wakelock, WAKE_LOCK_SUSPEND, "elan_touch"); - wake_lock_init(&d2w_wakelock, WAKE_LOCK_SUSPEND, "d2w_wakelock"); if(err==0x80) touch_debug(DEBUG_INFO, "[ELAN] Touch is in boot mode!\n"); @@ -1995,7 +1519,7 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, __set_bit(EV_ABS, ts->input_dev->evbit); __set_bit(EV_SYN, ts->input_dev->evbit); __set_bit(EV_KEY, ts->input_dev->evbit); - __set_bit(INPUT_PROP_DIRECT, ts->input_dev->propbit); + err = input_register_device(ts->input_dev); if (err) { @@ -2011,10 +1535,9 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, touch_debug(DEBUG_INFO, "[elan]%s: handle missed interrupt\n", __func__); elan_ktf3k_ts_irq_handler(client->irq, ts); } - #ifdef FIRMWARE_UPDATE_WITH_HEADER - if (RECOVERY || check_fw_version(touch_firmware, sizeof(touch_firmware), ts->fw_ver) > 0) + if(RECOVERY || check_fw_version(touch_firmware, sizeof(touch_firmware), ts->fw_ver) > 0) firmware_update_header(client, touch_firmware, sizeof(touch_firmware)/FIRMWARE_PAGE_SIZE); #endif @@ -2027,7 +1550,7 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, private_ts = ts; - elan_ktf3k_touch_sysfs_init(); + //elan_ktf2k_touch_sysfs_init(); ts->attrs.attrs = elan_attr; err = sysfs_create_group(&client->dev.kobj, &ts->attrs); if (err) { @@ -2094,9 +1617,9 @@ static int elan_ktf3k_ts_probe(struct i2c_client *client, input_free_device(ts->input_dev); err_input_dev_alloc_failed: -//err_detect_failed: -// if (ts->elan_wq) -// destroy_workqueue(ts->elan_wq); +err_detect_failed: + if (ts->elan_wq) + destroy_workqueue(ts->elan_wq); err_create_wq_failed: kfree(ts); @@ -2120,7 +1643,6 @@ static int elan_ktf3k_ts_remove(struct i2c_client *client) destroy_workqueue(ts->elan_wq); input_unregister_device(ts->input_dev); wake_lock_destroy(&ts->wakelock); - wake_lock_destroy(&d2w_wakelock); #ifdef TOUCH_STRESS_TEST misc_deregister(&ts->misc_dev); #endif @@ -2151,31 +1673,15 @@ static int elan_ktf3k_ts_suspend(struct i2c_client *client, pm_message_t mesg) int rc = 0; touch_debug(DEBUG_INFO, "[elan] %s: enter\n", __func__); - -/*s2w*/ - if (s2w_switch == 1 || dt2w_switch == 1) { - enable_irq_wake(client->irq); - } else { - disable_irq(client->irq); - } - + disable_irq(client->irq); force_release_pos(client); rc = cancel_work_sync(&ts->work); if (rc) enable_irq(client->irq); -/*s2w*/ - if((s2w_switch != 1 && !dt2w_switch) && work_lock == 0) + if(work_lock == 0) rc = elan_ktf3k_ts_set_power_state(client, PWR_STATE_DEEP_SLEEP); -/*s2w*/ - scr_suspended = true; - if ((dt2w_switch == 1) || (s2w_switch == 1)) { - if (wake_timeout == 0) { - wake_lock(&d2w_wakelock); - } else { - wake_lock_timeout(&d2w_wakelock, 100 * wake_timeout); - } - } + return 0; } @@ -2183,11 +1689,8 @@ static int elan_ktf3k_ts_resume(struct i2c_client *client) { int rc = 0, retry = 5; - //struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); - //int delay_time; - - //gpio_direction_output(31, 0); - + struct elan_ktf3k_ts_data *ts = i2c_get_clientdata(client); + int delay_time; touch_debug(DEBUG_INFO, "[elan] %s: enter\n", __func__); if(work_lock == 0){ do { @@ -2201,25 +1704,7 @@ static int elan_ktf3k_ts_resume(struct i2c_client *client) } while (--retry); } //force_release_pos(client); - -/* s2w */ - if (s2w_switch == 1 || dt2w_switch == 1) { - disable_irq_wake(client->irq); - } else { - enable_irq(client->irq); - } - - if (s2w_changed) - s2w_switch = s2w_switch_temp; - if (dt2w_changed) - dt2w_switch = dt2w_switch_temp; - - scr_suspended = false; - - if (wake_lock_active(&d2w_wakelock)) - wake_unlock(&d2w_wakelock); -/* end s2w */ - + enable_irq(client->irq); return 0; } @@ -2228,7 +1713,6 @@ static void elan_ktf3k_ts_early_suspend(struct early_suspend *h) { struct elan_ktf3k_ts_data *ts; ts = container_of(h, struct elan_ktf3k_ts_data, early_suspend); - //is_suspended = 1; elan_ktf3k_ts_suspend(ts->client, PMSG_SUSPEND); } @@ -2236,7 +1720,6 @@ static void elan_ktf3k_ts_late_resume(struct early_suspend *h) { struct elan_ktf3k_ts_data *ts; ts = container_of(h, struct elan_ktf3k_ts_data, early_suspend); - //is_suspended = 0; elan_ktf3k_ts_resume(ts->client); } #endif @@ -2276,4 +1759,3 @@ module_exit(elan_ktf3k_ts_exit); MODULE_DESCRIPTION("ELAN KTF3K Touchscreen Driver"); MODULE_LICENSE("GPL"); - From 744a886f1707e09a9958a3bc444c612acc907263 Mon Sep 17 00:00:00 2001 From: Metallice Date: Thu, 23 Oct 2014 21:00:02 -0400 Subject: [PATCH 646/678] gpio_keys.c: remove tap to wake --- drivers/input/keyboard/gpio_keys.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 138dadeed7e..b1d82022917 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -32,7 +32,6 @@ #include #include #include -#include struct gpio_button_data { const struct gpio_keys_button *button; @@ -749,9 +748,6 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) } input_sync(input); - sweep2wake_setdev(input); - printk(KERN_INFO "[sweep2wake]: set device %s\n", input->name); - device_init_wakeup(&pdev->dev, wakeup); return 0; @@ -889,3 +885,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Phil Blundell "); MODULE_DESCRIPTION("Keyboard driver for GPIOs"); MODULE_ALIAS("platform:gpio-keys"); + From b5c30b9107c2d49026667dacb6ac3623f0672a90 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Wed, 19 Feb 2014 15:30:47 -0800 Subject: [PATCH 647/678] Power: add an API to log wakeup reasons Add API log_wakeup_reason() and expose it to userspace via sysfs path /sys/kernel/wakeup_reasons/last_resume_reason Change-Id: I81addaf420f1338255c5d0638b0d244a99d777d1 Signed-off-by: Ruchi Kandoi --- include/linux/wakeup_reason.h | 23 ++++++ kernel/power/Makefile | 2 + kernel/power/wakeup_reason.c | 132 ++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 include/linux/wakeup_reason.h create mode 100644 kernel/power/wakeup_reason.c diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h new file mode 100644 index 00000000000..7ce50f0debc --- /dev/null +++ b/include/linux/wakeup_reason.h @@ -0,0 +1,23 @@ +/* + * include/linux/wakeup_reason.h + * + * Logs the reason which caused the kernel to resume + * from the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_WAKEUP_REASON_H +#define _LINUX_WAKEUP_REASON_H + +void log_wakeup_reason(int irq); + +#endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 9b224e16b19..a6ef0bed68d 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -16,3 +16,5 @@ obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_SUSPEND) += wakeup_reason.o diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c new file mode 100644 index 00000000000..ae9bfece9d9 --- /dev/null +++ b/kernel/power/wakeup_reason.c @@ -0,0 +1,132 @@ +/* + * kernel/power/wakeup_reason.c + * + * Logs the reasons which caused the kernel to resume from + * the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define MAX_WAKEUP_REASON_IRQS 32 +static int irq_list[MAX_WAKEUP_REASON_IRQS]; +static int irq_count; +static struct kobject *wakeup_reason; +static spinlock_t resume_reason_lock; + +static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int irq_no, buf_offset = 0; + struct irq_desc *desc; + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irq_count; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } + spin_unlock(&resume_reason_lock); + return buf_offset; +} + +static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0666, + reason_show, NULL); + +static struct attribute *attrs[] = { + &resume_reason.attr, + NULL, +}; +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +/* + * logs all the wake up reasons to the kernel + * stores the irqs to expose them to the userspace via sysfs + */ +void log_wakeup_reason(int irq) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, + desc->action->name); + else + printk(KERN_INFO "Resume caused by IRQ %d\n", irq); + + spin_lock(&resume_reason_lock); + irq_list[irq_count++] = irq; + spin_unlock(&resume_reason_lock); +} + +/* Detects a suspend and clears all the previous wake up reasons*/ +static int wakeup_reason_pm_event(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + spin_lock(&resume_reason_lock); + irq_count = 0; + spin_unlock(&resume_reason_lock); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block wakeup_reason_pm_notifier_block = { + .notifier_call = wakeup_reason_pm_event, +}; + +/* Initializes the sysfs parameter + * registers the pm_event notifier + */ +void __init wakeup_reason_init(void) +{ + int retval; + spin_lock_init(&resume_reason_lock); + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); + if (retval) + printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", + __func__, retval); + + wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!wakeup_reason) { + printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", + __func__); + return; + } + retval = sysfs_create_group(wakeup_reason, &attr_group); + if (retval) { + kobject_put(wakeup_reason); + printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", + __func__, retval); + } +} + +late_initcall(wakeup_reason_init); From b8c126968438fe53c1d9151dbba25ec7b3873528 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 20 Feb 2014 19:47:38 -0800 Subject: [PATCH 648/678] POWER: fix compile warnings in log_wakeup_reason Change I81addaf420f1338255c5d0638b0d244a99d777d1 introduced compile warnings, fix these. Change-Id: I05482a5335599ab96c0a088a7d175c8d4cf1cf69 Signed-off-by: Ruchi Kandoi --- kernel/power/wakeup_reason.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index ae9bfece9d9..82e69fe52d0 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -35,7 +35,7 @@ static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) + char *buf) { int irq_no, buf_offset = 0; struct irq_desc *desc; @@ -106,7 +106,7 @@ static struct notifier_block wakeup_reason_pm_notifier_block = { /* Initializes the sysfs parameter * registers the pm_event notifier */ -void __init wakeup_reason_init(void) +int __init wakeup_reason_init(void) { int retval; spin_lock_init(&resume_reason_lock); @@ -119,7 +119,7 @@ void __init wakeup_reason_init(void) if (!wakeup_reason) { printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", __func__); - return; + return 1; } retval = sysfs_create_group(wakeup_reason, &attr_group); if (retval) { @@ -127,6 +127,7 @@ void __init wakeup_reason_init(void) printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", __func__, retval); } + return 0; } late_initcall(wakeup_reason_init); From 3fafe0b133d1e463642301a037c392ad1443598a Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 28 Feb 2014 14:06:05 -0800 Subject: [PATCH 649/678] Power: Add an API call to log wakeup reasons Change-Id: I18bb8180efdb20fe784c193f9003bb0929f0c9a8 Signed-off-by: Ruchi Kandoi --- arch/arm/mach-tegra/pm-irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/pm-irq.c b/arch/arm/mach-tegra/pm-irq.c index 57d21361ca1..a6ca3380311 100644 --- a/arch/arm/mach-tegra/pm-irq.c +++ b/arch/arm/mach-tegra/pm-irq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -217,7 +218,7 @@ static void tegra_pm_irq_syscore_resume_helper( (wake + 32 * index)); continue; } - + log_wakeup_reason(irq); desc = irq_to_desc(irq); if (!desc || !desc->action || !desc->action->name) { pr_info("Resume caused by WAKE%d, irq %d\n", From 830332b269a1625a67cbfb042671260cda045bb4 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 24 Apr 2014 14:31:57 -0700 Subject: [PATCH 650/678] Power: Changes the permission to read only for sysfs file /sys/kernel/wakeup_reasons/last_resume_reason Change-Id: If25e8e416ee9726996518b58b6551a61dc1591e3 Signed-off-by: Ruchi Kandoi --- kernel/power/wakeup_reason.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 82e69fe52d0..9823d9ccde4 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -34,7 +34,7 @@ static int irq_count; static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; -static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { int irq_no, buf_offset = 0; @@ -53,8 +53,7 @@ static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, return buf_offset; } -static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0666, - reason_show, NULL); +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); static struct attribute *attrs[] = { &resume_reason.attr, From df75618e54c855598450f3ead031978ace489d44 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 18 Apr 2014 14:07:28 -0700 Subject: [PATCH 651/678] prctl: adds PR_SET_TIMERSLACK_PID for setting timer slack of an arbitrary thread. Second argument is similar to PR_SET_TIMERSLACK, if non-zero then the slack is set to that value otherwise sets it to the default for the thread. Takes PID of the thread as the third argument. This allows power/performance management software to set timer slack for other threads according to its policy for the thread (such as when the thread is designated foreground vs. background activity) Change-Id: I744d451ff4e60dae69f38f53948ff36c51c14a3f Signed-off-by: Ruchi Kandoi Conflicts: include/linux/prctl.h kernel/sys.c Conflicts: include/linux/prctl.h --- include/linux/prctl.h | 6 ++++++ kernel/sys.c | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 31994587485..b2ad45b73f4 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -108,4 +108,10 @@ */ #define PR_GET_EFFECTIVE_TIMERSLACK 35 +/* Sets the timerslack for arbitrary threads + * arg2 slack value, 0 means "use default" + * arg3 pid of the thread whose timer slack needs to be set + */ +#define PR_SET_TIMERSLACK_PID 41 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index c97500887c0..2a8711ec3b9 100755 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1718,6 +1719,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; + struct task_struct *tsk; unsigned char comm[sizeof(me->comm)]; long error; @@ -1865,6 +1867,23 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else error = PR_MCE_KILL_DEFAULT; break; + case PR_SET_TIMERSLACK_PID: + rcu_read_lock(); + tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); + if (tsk == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + get_task_struct(tsk); + rcu_read_unlock(); + if (arg2 <= 0) + tsk->timer_slack_ns = + tsk->default_timer_slack_ns; + else + tsk->timer_slack_ns = arg2; + put_task_struct(tsk); + error = 0; + break; default: error = -EINVAL; break; From 9a8c73b7c1fe4934b1915008599319985222706b Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Tue, 25 Mar 2014 16:43:28 -0700 Subject: [PATCH 652/678] nf: IDLETIMER: time-stamp and suspend/resume handling. Message notifications contains an additional timestamp field in nano seconds. The expiry time for the timers are modified during suspend/resume. If timer was supposed to expire while the system is suspended then a notification is sent when it resumes with the timestamp of the scheduled expiry. Removes the race condition for multiple work scheduled. Bug: 13247811 Change-Id: I752c5b00225fe7085482819f975cc0eb5af89bff Signed-off-by: Ruchi Kandoi --- net/netfilter/xt_IDLETIMER.c | 169 +++++++++++++++++++++++++++++++---- 1 file changed, 152 insertions(+), 17 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 542af525a87..f6b85949504 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -42,6 +42,11 @@ #include #include #include +#include +#include +#include +#include +#include #include struct idletimer_tg_attr { @@ -58,22 +63,65 @@ struct idletimer_tg { struct kobject *kobj; struct idletimer_tg_attr attr; + struct timespec delayed_timer_trigger; + struct timespec last_modified_timer; + struct timespec last_suspend_time; + struct notifier_block pm_nb; + + int timeout; unsigned int refcnt; + bool work_pending; bool send_nl_msg; bool active; }; static LIST_HEAD(idletimer_tg_list); static DEFINE_MUTEX(list_mutex); +static DEFINE_SPINLOCK(timestamp_lock); static struct kobject *idletimer_tg_kobj; +static bool check_for_delayed_trigger(struct idletimer_tg *timer, + struct timespec *ts) +{ + bool state; + struct timespec temp; + spin_lock_bh(×tamp_lock); + timer->work_pending = false; + if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout || + timer->delayed_timer_trigger.tv_sec != 0) { + state = false; + temp.tv_sec = timer->timeout; + temp.tv_nsec = 0; + if (timer->delayed_timer_trigger.tv_sec != 0) { + temp = timespec_add(timer->delayed_timer_trigger, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + timer->delayed_timer_trigger.tv_sec = 0; + timer->work_pending = true; + schedule_work(&timer->work); + } else { + temp = timespec_add(timer->last_modified_timer, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + } + } else { + state = timer->active; + } + spin_unlock_bh(×tamp_lock); + return state; +} + static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) { char iface_msg[NLMSG_MAX_SIZE]; char state_msg[NLMSG_MAX_SIZE]; - char *envp[] = { iface_msg, state_msg, NULL }; + char timestamp_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL }; int res; + struct timespec ts; + uint64_t time_ns; + bool state; res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", iface); @@ -81,12 +129,24 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) pr_err("message too long (%d)", res); return; } + + get_monotonic_boottime(&ts); + state = check_for_delayed_trigger(timer, &ts); res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s", - timer->active ? "active" : "inactive"); + state ? "active" : "inactive"); + if (NLMSG_MAX_SIZE <= res) { pr_err("message too long (%d)", res); return; } + + time_ns = timespec_to_ns(&ts); + res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns); + if (NLMSG_MAX_SIZE <= res) { + timestamp_msg[0] = '\0'; + pr_err("message too long (%d)", res); + } + pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); return; @@ -151,9 +211,55 @@ static void idletimer_tg_expired(unsigned long data) struct idletimer_tg *timer = (struct idletimer_tg *) data; pr_debug("timer %s expired\n", timer->attr.attr.name); - + spin_lock_bh(×tamp_lock); timer->active = false; + timer->work_pending = true; schedule_work(&timer->work); + spin_unlock_bh(×tamp_lock); +} + +static int idletimer_resume(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + struct timespec ts; + unsigned long time_diff, now = jiffies; + struct idletimer_tg *timer = container_of(notifier, + struct idletimer_tg, pm_nb); + if (!timer) + return NOTIFY_DONE; + switch (pm_event) { + case PM_SUSPEND_PREPARE: + get_monotonic_boottime(&timer->last_suspend_time); + break; + case PM_POST_SUSPEND: + spin_lock_bh(×tamp_lock); + if (!timer->active) { + spin_unlock_bh(×tamp_lock); + break; + } + /* since jiffies are not updated when suspended now represents + * the time it would have suspended */ + if (time_after(timer->timer.expires, now)) { + get_monotonic_boottime(&ts); + ts = timespec_sub(ts, timer->last_suspend_time); + time_diff = timespec_to_jiffies(&ts); + if (timer->timer.expires > (time_diff + now)) { + mod_timer_pending(&timer->timer, + (timer->timer.expires - time_diff)); + } else { + del_timer(&timer->timer); + timer->timer.expires = 0; + timer->active = false; + timer->work_pending = true; + schedule_work(&timer->work); + } + } + spin_unlock_bh(×tamp_lock); + break; + default: + break; + } + return NOTIFY_DONE; } static int idletimer_tg_create(struct idletimer_tg_info *info) @@ -189,6 +295,18 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) info->timer->refcnt = 1; info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true; info->timer->active = true; + info->timer->timeout = info->timeout; + + info->timer->delayed_timer_trigger.tv_sec = 0; + info->timer->delayed_timer_trigger.tv_nsec = 0; + info->timer->work_pending = false; + get_monotonic_boottime(&info->timer->last_modified_timer); + + info->timer->pm_nb.notifier_call = idletimer_resume; + ret = register_pm_notifier(&info->timer->pm_nb); + if (ret) + printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n", + __func__, ret); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); @@ -205,6 +323,34 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) return ret; } +static void reset_timer(const struct idletimer_tg_info *info) +{ + unsigned long now = jiffies; + struct idletimer_tg *timer = info->timer; + bool timer_prev; + + spin_lock_bh(×tamp_lock); + timer_prev = timer->active; + timer->active = true; + /* timer_prev is used to guard overflow problem in time_before*/ + if (!timer_prev || time_before(timer->timer.expires, now)) { + pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", + timer->timer.expires, now); + /* checks if there is a pending inactive notification*/ + if (timer->work_pending) + timer->delayed_timer_trigger = timer->last_modified_timer; + else { + timer->work_pending = true; + schedule_work(&timer->work); + } + } + + get_monotonic_boottime(&timer->last_modified_timer); + mod_timer(&timer->timer, + msecs_to_jiffies(info->timeout * 1000) + now); + spin_unlock_bh(×tamp_lock); +} + /* * The actual xt_tables plugin. */ @@ -228,9 +374,7 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, } /* TODO: Avoid modifying timers on each packet */ - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); return XT_CONTINUE; } @@ -259,17 +403,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; - info->timer->active = true; - - if (time_before(info->timer->timer.expires, now)) { - schedule_work(&info->timer->work); - pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", - info->timer->timer.expires, now); - } - - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { @@ -300,6 +434,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + unregister_pm_notifier(&info->timer->pm_nb); kfree(info->timer->attr.attr.name); kfree(info->timer); } else { From 14d72da0d72215a9e1b9cab22931a6d899024c14 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 24 Apr 2014 14:11:08 -0700 Subject: [PATCH 653/678] nf: Remove compilation error caused by e254d2c28c880da28626af6d53b7add5f7d6afee Signed-off-by: Ruchi Kandoi --- net/netfilter/xt_IDLETIMER.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f6b85949504..c6a76a80ee5 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -382,7 +382,6 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) { struct idletimer_tg_info *info = par->targinfo; int ret; - unsigned long now = jiffies; pr_debug("checkentry targinfo %s\n", info->label); From 9b4909bb8c4b6df4a97143d3865ffe4883f2139f Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Thu, 24 Apr 2014 15:50:33 -0700 Subject: [PATCH 654/678] net: ipv4: current group_info should be put after using. Plug a group_info refcount leak in ping_init. group_info is only needed during initialization and the code failed to release the reference on exit. While here move grabbing the reference to a place where it is actually needed. Signed-off-by: Chuansheng Liu Signed-off-by: Zhang Dongxing Signed-off-by: xiaoming wang Signed-off-by: David S. Miller Conflicts: net/ipv4/ping.c Change-Id: I51931e439cce7f19b4179f5828d7355bb3b1dcfa --- net/ipv4/ping.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 45bc0dd517f..353a44b5188 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -247,26 +247,33 @@ int ping_init_sock(struct sock *sk) struct net *net = sock_net(sk); gid_t group = current_egid(); gid_t range[2]; - struct group_info *group_info = get_current_groups(); - int i, j, count = group_info->ngroups; + struct group_info *group_info; + int i, j, count; + int ret = 0; inet_get_ping_group_range_net(net, range, range+1); if (range[0] <= group && group <= range[1]) return 0; + group_info = get_current_groups(); + count = group_info->ngroups; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); for (j = 0; j < cp_count; j++) { group = group_info->blocks[i][j]; if (range[0] <= group && group <= range[1]) - return 0; + goto out_release_group; } count -= cp_count; } - return -EACCES; + ret = -EACCES; + +out_release_group: + put_group_info(group_info); + return ret; } EXPORT_SYMBOL_GPL(ping_init_sock); From ae08ff5ec4410ddb628a41a79b3be813a798079a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 23 Aug 2012 02:09:11 +0000 Subject: [PATCH 655/678] netlink: fix possible spoofing from non-root processes Non-root user-space processes can send Netlink messages to other processes that are well-known for being subscribed to Netlink asynchronous notifications. This allows ilegitimate non-root process to send forged messages to Netlink subscribers. The userspace process usually verifies the legitimate origin in two ways: a) Socket credentials. If UID != 0, then the message comes from some ilegitimate process and the message needs to be dropped. b) Netlink portID. In general, portID == 0 means that the origin of the messages comes from the kernel. Thus, discarding any message not coming from the kernel. However, ctnetlink sets the portID in event messages that has been triggered by some user-space process, eg. conntrack utility. So other processes subscribed to ctnetlink events, eg. conntrackd, know that the event was triggered by some user-space action. Neither of the two ways to discard ilegitimate messages coming from non-root processes can help for ctnetlink. This patch adds capability validation in case that dst_pid is set in netlink_sendmsg(). This approach is aggressive since existing applications using any Netlink bus to deliver messages between two user-space processes will break. Note that the exception is NETLINK_USERSOCK, since it is reserved for netlink-to-netlink userspace communication. Still, if anyone wants that his Netlink bus allows netlink-to-netlink userspace, then they can set NL_NONROOT_SEND. However, by default, I don't think it makes sense to allow to use NETLINK_ROUTE to communicate two processes that are sending no matter what information that is not related to link/neighbouring/routing. They should be using NETLINK_USERSOCK instead for that. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db0211da..86d7a7afb9d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1339,7 +1339,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_pid) && + !netlink_capable(sock, NL_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -2102,6 +2103,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; netlink_table_ungrab(); } From b4b2a8c03cf28baec9ec71d88c06acc9c76dcc8e Mon Sep 17 00:00:00 2001 From: Andreas Henriksson Date: Thu, 7 Nov 2013 18:26:38 +0100 Subject: [PATCH 656/678] net: Fix "ip rule delete table 256" [ Upstream commit 13eb2ab2d33c57ebddc57437a7d341995fc9138c ] When trying to delete a table >= 256 using iproute2 the local table will be deleted. The table id is specified as a netlink attribute when it needs more then 8 bits and iproute2 then sets the table field to RT_TABLE_UNSPEC (0). Preconditions to matching the table id in the rule delete code doesn't seem to take the "table id in netlink attribute" into condition so the frh_get_table helper function never gets to do its job when matching against current rule. Use the helper function twice instead of peaking at the table value directly. Originally reported at: http://bugs.debian.org/724783 Reported-by: Nicolas HICHER Signed-off-by: Andreas Henriksson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/fib_rules.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 27071ee2a4e..d0a1b52ea0d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -442,7 +442,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && From 10900f39b1f366d7bd8ad5cc242e05d4d6c15b03 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Fri, 6 Jun 2014 13:01:56 -0700 Subject: [PATCH 657/678] ARM: nakasi: turn off android logger Signed-off-by: Mark Salyzyn Bug: 15384806 Change-Id: I623e15d1d7317b4aef58918536b9014fe2527470 --- arch/arm/configs/tegra3_android_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 6d730d24862..584a9e6b307 100644 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -454,7 +454,6 @@ CONFIG_RTC_DRV_RC5T583=y CONFIG_STAGING=y CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y -CONFIG_ANDROID_LOGGER=y CONFIG_ANDROID_RAM_CONSOLE=y CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION=y CONFIG_ANDROID_TIMED_GPIO=y From df9b8b3dde62ac2227b832b8ff35e1b22e7b7388 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Jun 2014 12:27:06 +0000 Subject: [PATCH 658/678] futex-prevent-requeue-pi-on-same-futex.patch futex: Forbid uaddr == uaddr2 in futex_requeue(..., requeue_pi=1) If uaddr == uaddr2, then we have broken the rule of only requeueing from a non-pi futex to a pi futex with this call. If we attempt this, then dangling pointers may be left for rt_waiter resulting in an exploitable condition. This change brings futex_requeue() into line with futex_wait_requeue_pi() which performs the same check as per commit 6f7b0a2a5 (futex: Forbid uaddr == uaddr2 in futex_wait_requeue_pi()) [ tglx: Compare the resulting keys as well, as uaddrs might be different depending on the mapping ] Fixes CVE-2014-3153. Reported-by: Pinkie Pie Signed-off-by: Will Drewry Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/futex.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 12082519313..f0ff490eeb6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1254,6 +1254,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 curval2; if (requeue_pi) { + /* + * Requeue PI only works on two distinct uaddrs. This + * check is only valid for private futexes. See below. + */ + if (uaddr1 == uaddr2) + return -EINVAL; + /* * requeue_pi requires a pi_state, try to allocate it now * without any locks in case it fails. @@ -1292,6 +1299,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, if (unlikely(ret != 0)) goto out_put_key1; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (requeue_pi && match_futex(&key1, &key2)) { + ret = -EINVAL; + goto out_put_keys; + } + hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -2307,6 +2323,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (ret) goto out_key2; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (match_futex(&q.key, &key2)) { + ret = -EINVAL; + goto out_put_keys; + } + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); From e0cdbd0a534af05de6133cd95c6bb59ebeca6e11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Jun 2014 12:27:06 +0000 Subject: [PATCH 659/678] futex: Validate atomic acquisition in futex_lock_pi_atomic() We need to protect the atomic acquisition in the kernel against rogue user space which sets the user space futex to 0, so the kernel side acquisition succeeds while there is existing state in the kernel associated to the real owner. Verify whether the futex has waiters associated with kernel state. If it has, return -EINVAL. The state is corrupted already, so no point in cleaning it up. Subsequent calls will fail as well. Not our problem. [ tglx: Use futex_top_waiter() and explain why we do not need to try restoring the already corrupted user space state. ] Signed-off-by: Darren Hart Cc: Kees Cook Cc: Will Drewry Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/futex.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index f0ff490eeb6..26672940996 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -740,10 +740,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, return -EDEADLK; /* - * Surprise - we got the lock. Just return to userspace: + * Surprise - we got the lock, but we do not trust user space at all. */ - if (unlikely(!curval)) - return 1; + if (unlikely(!curval)) { + /* + * We verify whether there is kernel state for this + * futex. If not, we can safely assume, that the 0 -> + * TID transition is correct. If state exists, we do + * not bother to fixup the user space state as it was + * corrupted already. + */ + return futex_top_waiter(hb, key) ? -EINVAL : 1; + } uval = curval; From e56aff8e2c1277109ba8f1c5b98d930ff78b880b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Jun 2014 12:27:07 +0000 Subject: [PATCH 660/678] futex: Always cleanup owner tid in unlock_pi If the owner died bit is set at futex_unlock_pi, we currently do not cleanup the user space futex. So the owner TID of the current owner (the unlocker) persists. That's observable inconsistant state, especially when the ownership of the pi state got transferred. Clean it up unconditionally. Signed-off-by: Thomas Gleixner Cc: Kees Cook Cc: Will Drewry Cc: Darren Hart Cc: stable@vger.kernel.org Conflicts: kernel/futex.c --- kernel/futex.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 26672940996..f316d1b81ca 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -875,6 +875,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 curval, newval; + int ret = 0; if (!pi_state) return -EINVAL; @@ -898,23 +899,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) new_owner = this->task; /* - * We pass it to the next owner. (The WAITERS bit is always - * kept enabled while there is PI state around. We must also - * preserve the owner died bit.) + * We pass it to the next owner. The WAITERS bit is always + * kept enabled while there is PI state around. We cleanup the + * owner died bit, because we are the owner. */ - if (!(uval & FUTEX_OWNER_DIED)) { - int ret = 0; - - newval = FUTEX_WAITERS | task_pid_vnr(new_owner); + newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) - ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; - if (ret) { - raw_spin_unlock(&pi_state->pi_mutex.wait_lock); - return ret; - } + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + ret = -EFAULT; + else if (curval != uval) + ret = -EINVAL; + if (ret) { + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); + return ret; } raw_spin_lock_irq(&pi_state->owner->pi_lock); @@ -2136,9 +2133,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) /* * To avoid races, try to do the TID -> 0 atomic transition * again. If it succeeds then we can return without waking - * anyone else up: + * anyone else up. We only try this if neither the waiters nor + * the owner died bit are set. */ - if (!(uval & FUTEX_OWNER_DIED) && + if (!(uval & ~FUTEX_TID_MASK) && cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) goto pi_faulted; /* @@ -2170,11 +2168,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) /* * No waiters - kernel unlocks the futex: */ - if (!(uval & FUTEX_OWNER_DIED)) { - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; - } + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; out_unlock: spin_unlock(&hb->lock); From 668edda479aef5128bdb1c4ce976951954867971 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Jun 2014 12:27:08 +0000 Subject: [PATCH 661/678] futex: Make lookup_pi_state more robust The current implementation of lookup_pi_state has ambigous handling of the TID value 0 in the user space futex. We can get into the kernel even if the TID value is 0, because either there is a stale waiters bit or the owner died bit is set or we are called from the requeue_pi path or from user space just for fun. The current code avoids an explicit sanity check for pid = 0 in case that kernel internal state (waiters) are found for the user space address. This can lead to state leakage and worse under some circumstances. Handle the cases explicit: Waiter | pi_state | pi->owner | uTID | uODIED | ? [1] NULL | --- | --- | 0 | 0/1 | Valid [2] NULL | --- | --- | >0 | 0/1 | Valid [3] Found | NULL | -- | Any | 0/1 | Invalid [4] Found | Found | NULL | 0 | 1 | Valid [5] Found | Found | NULL | >0 | 1 | Invalid [6] Found | Found | task | 0 | 1 | Valid [7] Found | Found | NULL | Any | 0 | Invalid [8] Found | Found | task | ==taskTID | 0/1 | Valid [9] Found | Found | task | 0 | 0 | Invalid [10] Found | Found | task | !=taskTID | 0/1 | Invalid [1] Indicates that the kernel can acquire the futex atomically. We came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. [2] Valid, if TID does not belong to a kernel thread. If no matching thread is found then it indicates that the owner TID has died. [3] Invalid. The waiter is queued on a non PI futex [4] Valid state after exit_robust_list(), which sets the user space value to FUTEX_WAITERS | FUTEX_OWNER_DIED. [5] The user space value got manipulated between exit_robust_list() and exit_pi_state_list() [6] Valid state after exit_pi_state_list() which sets the new owner in the pi_state but cannot access the user space value. [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. [8] Owner and user space value match [9] There is no transient state which sets the user space TID to 0 except exit_robust_list(), but this is indicated by the FUTEX_OWNER_DIED bit. See [4] [10] There is no transient state which leaves owner and user space TID out of sync. Backport to 3.13 conflicts: kernel/futex.c Signed-off-by: Thomas Gleixner Signed-off-by: John Johansen Cc: Kees Cook Cc: Will Drewry Cc: Darren Hart Cc: stable@vger.kernel.org --- kernel/futex.c | 123 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 106 insertions(+), 17 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index f316d1b81ca..5c64512c6bc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -586,6 +586,55 @@ void exit_pi_state_list(struct task_struct *curr) raw_spin_unlock_irq(&curr->pi_lock); } +/* + * We need to check the following states: + * + * Waiter | pi_state | pi->owner | uTID | uODIED | ? + * + * [1] NULL | --- | --- | 0 | 0/1 | Valid + * [2] NULL | --- | --- | >0 | 0/1 | Valid + * + * [3] Found | NULL | -- | Any | 0/1 | Invalid + * + * [4] Found | Found | NULL | 0 | 1 | Valid + * [5] Found | Found | NULL | >0 | 1 | Invalid + * + * [6] Found | Found | task | 0 | 1 | Valid + * + * [7] Found | Found | NULL | Any | 0 | Invalid + * + * [8] Found | Found | task | ==taskTID | 0/1 | Valid + * [9] Found | Found | task | 0 | 0 | Invalid + * [10] Found | Found | task | !=taskTID | 0/1 | Invalid + * + * [1] Indicates that the kernel can acquire the futex atomically. We + * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. + * + * [2] Valid, if TID does not belong to a kernel thread. If no matching + * thread is found then it indicates that the owner TID has died. + * + * [3] Invalid. The waiter is queued on a non PI futex + * + * [4] Valid state after exit_robust_list(), which sets the user space + * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. + * + * [5] The user space value got manipulated between exit_robust_list() + * and exit_pi_state_list() + * + * [6] Valid state after exit_pi_state_list() which sets the new owner in + * the pi_state but cannot access the user space value. + * + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. + * + * [8] Owner and user space value match + * + * [9] There is no transient state which sets the user space TID to 0 + * except exit_robust_list(), but this is indicated by the + * FUTEX_OWNER_DIED bit. See [4] + * + * [10] There is no transient state which leaves owner and user space + * TID out of sync. + */ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps) @@ -601,12 +650,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, plist_for_each_entry_safe(this, next, head, list) { if (match_futex(&this->key, key)) { /* - * Another waiter already exists - bump up - * the refcount and return its pi_state: + * Sanity check the waiter before increasing + * the refcount and attaching to it. */ pi_state = this->pi_state; /* - * Userspace might have messed up non-PI and PI futexes + * Userspace might have messed up non-PI and + * PI futexes [3] */ if (unlikely(!pi_state)) return -EINVAL; @@ -614,34 +664,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, WARN_ON(!atomic_read(&pi_state->refcount)); /* - * When pi_state->owner is NULL then the owner died - * and another waiter is on the fly. pi_state->owner - * is fixed up by the task which acquires - * pi_state->rt_mutex. - * - * We do not check for pid == 0 which can happen when - * the owner died and robust_list_exit() cleared the - * TID. + * Handle the owner died case: */ - if (pid && pi_state->owner) { + if (uval & FUTEX_OWNER_DIED) { + /* + * exit_pi_state_list sets owner to NULL and + * wakes the topmost waiter. The task which + * acquires the pi_state->rt_mutex will fixup + * owner. + */ + if (!pi_state->owner) { + /* + * No pi state owner, but the user + * space TID is not 0. Inconsistent + * state. [5] + */ + if (pid) + return -EINVAL; + /* + * Take a ref on the state and + * return. [4] + */ + goto out_state; + } + + /* + * If TID is 0, then either the dying owner + * has not yet executed exit_pi_state_list() + * or some waiter acquired the rtmutex in the + * pi state, but did not yet fixup the TID in + * user space. + * + * Take a ref on the state and return. [6] + */ + if (!pid) + goto out_state; + } else { /* - * Bail out if user space manipulated the - * futex value. + * If the owner died bit is not set, + * then the pi_state must have an + * owner. [7] */ - if (pid != task_pid_vnr(pi_state->owner)) + if (!pi_state->owner) return -EINVAL; } + /* + * Bail out if user space manipulated the + * futex value. If pi state exists then the + * owner TID must be the same as the user + * space TID. [9/10] + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; + + out_state: atomic_inc(&pi_state->refcount); *ps = pi_state; - return 0; } } /* * We are the first waiter - try to look up the real owner and attach - * the new pi_state to it, but bail out when TID = 0 + * the new pi_state to it, but bail out when TID = 0 [1] */ if (!pid) return -ESRCH; @@ -669,6 +755,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return ret; } + /* + * No existing pi state. First waiter. [2] + */ pi_state = alloc_pi_state(); /* From d30e96a39a0a0fe22c3a783cc8d8460aebad67b5 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 13 Jun 2014 17:03:01 -0700 Subject: [PATCH 662/678] prctl: adds the capable(CAP_SYS_NICE) check to PR_SET_TIMERSLACK_PID. Adds a capable() check to make sure that arbitary apps do not change the timer slack for other apps. Bug: 15000427 Change-Id: I558a2551a0e3579c7f7e7aae54b28aa9d982b209 Signed-off-by: Ruchi Kandoi --- kernel/sys.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index 2a8711ec3b9..0136e44fea8 100755 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1868,6 +1868,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = PR_MCE_KILL_DEFAULT; break; case PR_SET_TIMERSLACK_PID: + if (current->pid != (pid_t)arg3 && + !capable(CAP_SYS_NICE)) + return -EPERM; rcu_read_lock(); tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); if (tsk == NULL) { From bc43b910f22f0c0105d176d9ce0a6f6499ea9b9a Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 8 Jul 2014 13:00:01 -0700 Subject: [PATCH 663/678] net: wireless: bcmdhd: Add WIPHY_WOWLAN_ANY support Change-Id: I4a9de9ab40f6de2e5ef5830529cfb066d6c75b57 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/bcmdhd/wl_cfg80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 9f46c7765b0..b16d0bb5031 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -4753,6 +4753,9 @@ static s32 wl_setup_wiphy(struct wireless_dev *wdev, struct device *sdiofunc_dev #endif /* AP_SME flag can be advertised to remove patch from wpa_supplicant */ wdev->wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME; +#if defined(CONFIG_PM) + wdev->wiphy->wowlan.flags = WIPHY_WOWLAN_ANY; +#endif WL_DBG(("Registering custom regulatory)\n")); wdev->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(wdev->wiphy, &brcm_regdom); From 512232af1e169bcda31b1d497758f94516ca6ac3 Mon Sep 17 00:00:00 2001 From: Shawn Lu Date: Sat, 4 Feb 2012 12:38:09 +0000 Subject: [PATCH 664/678] tcp_v4_send_reset: binding oif to iif in no sock case Binding RST packet outgoing interface to incoming interface for tcp v4 when there is no socket associate with it. when sk is not NULL, using sk->sk_bound_dev_if instead. (suggested by Eric Dumazet). This has few benefits: 1. tcp_v6_send_reset already did that. 2. This helps tcp connect with SO_BINDTODEVICE set. When connection is lost, we still able to sending out RST using same interface. 3. we are sending reply, it is most likely to be succeed if iif is used [android-3.4 commit e2446eaab5585555a38ea0df4e01ff313dbb4ac9] Change-Id: I44eed2e7168eb3586d1ea724a23ecc3492edb9d6 Signed-off-by: Shawn Lu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6cdf6a28f6b..4c4962f414a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -650,6 +650,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. + * routing might fail in this case. using iif for oif to + * make sure we can deliver it + */ + arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, From 1da4d7ab66abaeaa19e566129fa6e7b2ca9bff8e Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Fri, 12 Oct 2012 04:34:17 +0000 Subject: [PATCH 665/678] tcp: resets are misrouted [ Upstream commit 4c67525849e0b7f4bd4fab2487ec9e43ea52ef29 ] After commit e2446eaa ("tcp_v4_send_reset: binding oif to iif in no sock case").. tcp resets are always lost, when routing is asymmetric. Yes, backing out that patch will result in misrouting of resets for dead connections which used interface binding when were alive, but we actually cannot do anything here. What's died that's died and correct handling normal unbound connections is obviously a priority. Comment to comment: > This has few benefits: > 1. tcp_v6_send_reset already did that. It was done to route resets for IPv6 link local addresses. It was a mistake to do so for global addresses. The patch fixes this as well. Actually, the problem appears to be even more serious than guaranteed loss of resets. As reported by Sergey Soloviev , those misrouted resets create a lot of arp traffic and huge amount of unresolved arp entires putting down to knees NAT firewalls which use asymmetric routing. Change-Id: I49311654de05ca97ae370931e4a6e83f6ca3ced7 Signed-off-by: Alexey Kuznetsov Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 7 ++++--- net/ipv6/tcp_ipv6.c | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4c4962f414a..b1ab6e41f2c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -651,10 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; /* When socket is gone, all binding information is lost. - * routing might fail in this case. using iif for oif to - * make sure we can deliver it + * routing might fail in this case. No choice here, if we choose to force + * input interface, we will misroute in case of asymmetric route. */ - arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cdbce216521..6426916f323 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1046,7 +1046,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - fl6.flowi6_oif = inet6_iif(skb); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = inet6_iif(skb); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); From 11b0286ff5d227affbbeaae994b5c4a134941307 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Nov 2011 17:29:23 -0500 Subject: [PATCH 666/678] ipv6: tcp: fix tcp_v6_conn_request() Since linux 2.6.26 (commit c6aefafb7ec6 : Add IPv6 support to TCP SYN cookies), we can drop a SYN packet reusing a TIME_WAIT socket. (As a matter of fact we fail to send the SYNACK answer) As the client resends its SYN packet after a one second timeout, we accept it, because first packet removed the TIME_WAIT socket before being dropped. This probably explains why nobody ever noticed or complained. [net-next commit 4d0fe50c75a547088e4304e5eb5f521514dfae46] Change-Id: I1575a5c76dd3a21bb9cdb08aa8f0580d20700c3d Reported-by: Jesse Young Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6426916f323..e972597c05c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1253,6 +1253,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); + treq->iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); + if (!isn) { struct inet_peer *peer = NULL; @@ -1262,12 +1269,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) atomic_inc(&skb->users); treq->pktopts = skb; } - treq->iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); From 4da5c6b7a7ee8c49ac4670ace77ce6970994d8c6 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 27 Feb 2014 13:38:26 +0900 Subject: [PATCH 667/678] net: ipv6: ping: Use socket mark in routing lookup [net-next commit bf439b3154ce49d81a79b14f9fab18af99018ae2] Change-Id: Id3b8fe1f44f5b990520781e327d5070827ba06fc Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/ping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bc30c4b4e48..4a5b24863c9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -158,6 +158,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); From 20051f15d314a9834604829e966463fddade7332 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 26 Mar 2014 19:35:41 +0900 Subject: [PATCH 668/678] net: ipv6: autoconf routes into per-device tables Currently, IPv6 router discovery always puts routes into RT6_TABLE_MAIN. This causes problems for connection managers that want to support multiple simultaneous network connections and want control over which one is used by default (e.g., wifi and wired). To work around this connection managers typically take the routes they prefer and copy them to static routes with low metrics in the main table. This puts the burden on the connection manager to watch netlink to see if the routes have changed, delete the routes when their lifetime expires, etc. Instead, this patch adds a per-interface sysctl to have the kernel put autoconf routes into different tables. This allows each interface to have its own autoconf table, and choosing the default interface (or using different interfaces at the same time for different types of traffic) can be done using appropriate ip rules. The sysctl behaves as follows: - = 0: default. Put routes into RT6_TABLE_MAIN as before. - > 0: manual. Put routes into the specified table. - < 0: automatic. Add the absolute value of the sysctl to the device's ifindex, and use that table. The automatic mode is most useful in conjunction with net.ipv6.conf.default.accept_ra_rt_table. A connection manager or distribution could set it to, say, -100 on boot, and thereafter just use IP rules. [android-3.4 commit 5fe5c512af518d0abbbc0d2fafa8e355f518c2a9] Change-Id: I245b9ad56e72cd7b1e6fe8e1aa8c042affeb69a2 Signed-off-by: Lorenzo Colitti --- include/linux/ipv6.h | 2 ++ include/net/addrconf.h | 2 ++ net/ipv6/addrconf.c | 37 +++++++++++++++++++++- net/ipv6/route.c | 70 +++++++++++++++++------------------------- 4 files changed, 69 insertions(+), 42 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0c997767429..15395001dc5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -161,6 +161,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 accept_ra_rt_table; __s32 proxy_ndp; __s32 accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -213,6 +214,7 @@ enum { DEVCONF_DISABLE_IPV6, DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, + DEVCONF_ACCEPT_RA_RT_TABLE, DEVCONF_MAX }; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index cbc6bb0a683..f48830293d8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -166,6 +166,8 @@ extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); + /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1587d0d9295..b10720f2903 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -192,6 +192,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -226,6 +227,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -1680,6 +1682,31 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad } #endif +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) { + /* Determines into what table to put autoconf PIO/RIO/default routes + * learned on this device. + * + * - If 0, use the same table for every device. This puts routes into + * one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route + * (but note that these three are currently all equal to + * RT6_TABLE_MAIN). + * - If > 0, use the specified table. + * - If < 0, put routes into table dev->ifindex + (-rt_table). + */ + struct inet6_dev *idev = in6_dev_get(dev); + u32 table; + int sysctl = idev->cnf.accept_ra_rt_table; + if (sysctl == 0) { + table = default_table; + } else if (sysctl > 0) { + table = (u32) sysctl; + } else { + table = (unsigned) dev->ifindex + (-sysctl); + } + in6_dev_put(idev); + return table; +} + /* * Add prefix route. */ @@ -1689,7 +1716,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_PREFIX), .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -3860,6 +3887,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table; array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -4470,6 +4498,13 @@ static struct addrconf_sysctl_table }, #endif #endif + { + .procname = "accept_ra_rt_table", + .data = &ipv6_devconf.accept_ra_rt_table, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f60d8b64a1..7388ee85885 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -89,13 +89,13 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr); #endif static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) @@ -547,7 +547,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr) { - struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -589,8 +588,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr); if (rt && !lifetime) { ip6_del_rt(rt); @@ -598,8 +596,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -1791,15 +1788,16 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_INFO)); if (table == NULL) return NULL; @@ -1809,7 +1807,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_dev->ifindex != ifindex) + if (rt->rt6i_dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; @@ -1823,21 +1821,21 @@ static struct rt6_info *rt6_get_route_info(struct net *net, return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_INFO), .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_nlinfo.pid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1849,7 +1847,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(dev, prefix, prefixlen, gwaddr); } #endif @@ -1858,7 +1856,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_DFLT)); if (table == NULL) return NULL; @@ -1880,7 +1879,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_DFLT), .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -1897,28 +1896,17 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) -{ - struct rt6_info *rt; - struct fib6_table *table; - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (table == NULL) - return; +int rt6_addrconf_purge(struct rt6_info *rt, void *arg) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) + return -1; + return 0; +} -restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && - (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); - goto restart; - } - } - read_unlock_bh(&table->tb6_lock); +void rt6_purge_dflt_routers(struct net *net) +{ + fib6_clean_all(net, rt6_addrconf_purge, 0, NULL); } static void rtmsg_to_fib6_config(struct net *net, From 512ca1d804f3785437478d6c09e2e59629fe90f3 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 18 Mar 2014 20:52:27 +0900 Subject: [PATCH 669/678] net: add a sysctl to reflect the fwmark on replies Kernel-originated IP packets that have no user socket associated with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.) are emitted with a mark of zero. Add a sysctl to make them have the same mark as the packet they are replying to. This allows an administrator that wishes to do so to use mark-based routing, firewalling, etc. for these replies by marking the original packets inbound. Tested using user-mode linux: - ICMP/ICMPv6 echo replies and errors. - TCP RST packets (IPv4 and IPv6). [android-3.4 commit 3356997e1e1b2aa9959f046203e6d0b193bbd7f7] Change-Id: If3dfa98be3115ef405366a90bb9514d453629baa Signed-off-by: Lorenzo Colitti --- Documentation/networking/ip-sysctl.txt | 14 ++++++++++++++ include/net/ip.h | 3 +++ include/net/ipv6.h | 3 +++ include/net/netns/ipv4.h | 1 + include/net/netns/ipv6.h | 1 + net/ipv4/icmp.c | 11 +++++++++-- net/ipv4/ip_output.c | 3 ++- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv6/icmp.c | 6 ++++++ net/ipv6/sysctl_net_ipv6.c | 7 +++++++ net/ipv6/tcp_ipv6.c | 1 + 11 files changed, 54 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ca5cdcd0f0e..35ff17979b3 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -22,6 +22,13 @@ ip_no_pmtu_disc - BOOLEAN min_pmtu - INTEGER default 562 - minimum discovered Path MTU +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv4 reply packets that are not + associated with a socket for example, TCP RSTs or ICMP echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. @@ -1036,6 +1043,13 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv6 reply packets that are not + associated with a socket for example, TCP RSTs or ICMPv6 echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + conf/interface/*: Change special settings per interface. diff --git a/include/net/ip.h b/include/net/ip.h index aa76c7a4d9c..0f6e8f1f645 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -236,6 +236,9 @@ extern void ipfrag_init(void); extern void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ + ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0bca86fc77d..d32527d2715 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -109,6 +109,9 @@ struct frag_hdr { #define IP6_MF 0x0001 +#define IP6_REPLY_MARK(net, mark) \ + ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) + #include /* sysctls */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d786b4fc02a..20f799d7ad3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -55,6 +55,7 @@ struct netns_ipv4 { int current_rt_cache_rebuild_count; unsigned int sysctl_ping_group_range[2]; + int sysctl_fwmark_reflect; atomic_t rt_genid; atomic_t dev_addr_genid; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 81abfcb2eb4..20b76abcb15 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -25,6 +25,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int icmpv6_time; + int fwmark_reflect; }; struct netns_ipv6 { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index cd9a67df0b5..19d18cb46c2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -334,6 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -346,6 +347,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -357,6 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = rt->rt_spec_dst; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -375,7 +378,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -387,6 +390,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -484,6 +488,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -580,6 +585,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -596,11 +602,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; ipc.tx_flags = 0; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); if (IS_ERR(rt)) goto out_unlock; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8c6563361ab..070df0b113d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1487,7 +1487,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(sock_net(sk), skb->mark), RT_TOS(ip_hdr(skb)->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f8651024ea6..1d774f85bcd 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -743,6 +743,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_ping_group_range, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6a12eda8761..a83b79611aa 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -396,6 +396,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb->network_header + sizeof(*hdr)) > skb->tail) @@ -461,6 +462,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) ipv6_addr_copy(&fl6.daddr, &hdr->saddr); if (saddr) ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -469,6 +471,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -543,6 +546,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -559,11 +563,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl6.saddr, saddr); fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6dcf5e7d661..4c27009d39e 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -47,6 +47,13 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e972597c05c..c556119e0e6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1048,6 +1048,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_proto = IPPROTO_TCP; if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) fl6.flowi6_oif = inet6_iif(skb); + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); From 982bcc0392d8c17e202e1f1b279f33a302fdd957 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 26 Mar 2014 13:03:12 +0900 Subject: [PATCH 670/678] net: support marking accepting TCP sockets When using mark-based routing, sockets returned from accept() may need to be marked differently depending on the incoming connection request. This is the case, for example, if different socket marks identify different networks: a listening socket may want to accept connections from all networks, but each connection should be marked with the network that the request came in on, so that subsequent packets are sent on the correct network. This patch adds a sysctl to mark TCP sockets based on the fwmark of the incoming SYN packet. If enabled, and an unmarked socket receives a SYN, then the SYN packet's fwmark is written to the connection's inet_request_sock, and later written back to the accepted socket when the connection is established. If the socket already has a nonzero mark, then the behaviour is the same as it is today, i.e., the listening socket's fwmark is used. Black-box tested using user-mode linux: - IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the mark of the incoming SYN packet. - The socket returned by accept() is marked with the mark of the incoming SYN packet. - Tested with syncookies=1 and syncookies=2. [android-3.4 commit 4593f09b1f9939ec6ed2f8d7848def26b98c47ac] Change-Id: I5e8c9b989762a93f3eb5a0c1b4df44f62d57f3cb Signed-off-by: Lorenzo Colitti --- include/net/inet_sock.h | 10 ++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/inet_connection_sock.c | 6 ++++-- net/ipv4/syncookies.c | 3 ++- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/syncookies.c | 4 +++- net/ipv6/tcp_ipv6.c | 3 ++- 9 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b897d6e6d0a..da4d79faee3 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -87,6 +87,7 @@ struct inet_request_sock { no_srccheck: 1; kmemcheck_bitfield_end(flags); struct ip_options_rcu *opt; + u32 ir_mark; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -94,6 +95,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) +{ + if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { + return skb->mark; + } else { + return sk->sk_mark; + } +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20f799d7ad3..76ebd40d524 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -56,6 +56,7 @@ struct netns_ipv4 { unsigned int sysctl_ping_group_range[2]; int sysctl_fwmark_reflect; + int sysctl_tcp_fwmark_accept; atomic_t rt_genid; atomic_t dev_addr_genid; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88ad348..1f9a5312b9f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -358,7 +358,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, @@ -391,7 +391,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, @@ -604,6 +604,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3bc5c8f7c71..0ab15bddbec 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -310,6 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_mark = inet_request_mark(sk, skb); ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -348,7 +349,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { struct flowi4 fl4; - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + flowi4_init_output(&fl4, 0, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1d774f85bcd..2a4b1815172 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -750,6 +750,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_fwmark_accept", + .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b1ab6e41f2c..0b4a35e03df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1344,6 +1344,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); + ireq->ir_mark = inet_request_mark(sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a58e8cf664..d75a47db7fc 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -69,7 +69,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ac838965ff3..dc673fcc7a7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -215,6 +215,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq6->iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = ecn_ok; @@ -241,7 +243,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c556119e0e6..572d556fb7b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -493,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowlabel = 0; fl6.flowi6_oif = treq->iif; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); @@ -1255,6 +1255,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, tcp_hdr(skb)); treq->iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_mark = inet_request_mark(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && From e55747f7c6935c3f36f82acd13332fdd8fcf5ada Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 11 Jan 2013 13:19:27 +0100 Subject: [PATCH 671/678] cgroup: Take css_set_lock from cgroup_css_sets_empty() As indicated in the comment above cgroup_css_sets_empty it needs the css_set_lock. But neither of the 2 call points have it, so rather then fixing the callers just take the lock inside cgroup_css_sets_empty(). Signed-off-by: Hans de Goede Change-Id: If7aea71824f6d0e3f2cc6c1ce236c3ae6be2037b --- kernel/cgroup.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 54a36fe288f..f3fbc90fbce 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3977,19 +3977,23 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) return !failed; } -/* checks if all of the css_sets attached to a cgroup have a refcount of 0. - * Must be called with css_set_lock held */ +/* Checks if all of the css_sets attached to a cgroup have a refcount of 0. */ static int cgroup_css_sets_empty(struct cgroup *cgrp) { struct cg_cgroup_link *link; + int retval = 1; + read_lock(&css_set_lock); list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { struct css_set *cg = link->cg; - if (atomic_read(&cg->refcount) > 0) - return 0; + if (atomic_read(&cg->refcount) > 0) { + retval = 0; + break; + } } + read_unlock(&css_set_lock); - return 1; + return retval; } static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) From 0ea56838c41f33a4f9c6f25019fd58419ce5bdb5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Jan 2013 10:08:44 +0100 Subject: [PATCH 672/678] cgroup: Fix use after free of cgrp (cgrp->css_sets) Running a 3.4 kernel + Fedora-18 (systemd) userland on my Allwinner A10 (arm cortex a8), I'm seeing repeated, reproducable list_del list corruption errors when build with CONFIG_DEBUG_LIST, and the backtrace always shows free_css_set_work as the function making the problematic list_del call. I've tracked this doen to a use after free of the cgrp struct, specifically of the cgrp->css_sets list_head, which gets cleared by free_css_set_work. Since free_css_set_work runs form a workqueue, it is possible for it to not be done with clearing the list when the cgrp gets free-ed. To avoid this the code adding the links increases cgrp->count, and the freeing code running from the workqueue decreases cgrp->count *after* doing list_del, and then if the count goes to 0 calls cgroup_wakeup_rmdir_waiter(). However cgroup_rmdir() is missing a check for cgrp->count != 0, causing it to still continue with the rmdir (which leads to the free-ing of the cgrp), before free_css_set_work is done. Sometimes the free-ed memory is re-used before free_css_set_work gets around to unlinking link->cgrp_link_list, triggering the list_del list corruption messages. This patch fixes this by properly checking for cgrp->count != 0 and waiting for the cgroup_rmdir_waitq in that case. Change-Id: I9dbc02a0a75d5dffa1b65d67456e00139dea57c3 Signed-off-by: Hans de Goede --- kernel/cgroup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f3fbc90fbce..0e4298fc19a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -272,7 +272,7 @@ static void check_for_release(struct cgroup *cgrp); /* * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some + * list_empty(&cgroup->children) && subsys has some * reference to css->refcnt. In general, this refcnt is expected to goes down * to zero, soon. * @@ -3935,6 +3935,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) struct cgroup_subsys *ss; unsigned long flags; bool failed = false; + + if (atomic_read(&cgrp->count) != 0) + return false; + local_irq_save(flags); for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; From c8cfcbc6d6552b83ccba13b63d9fc20aed2c7a9f Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 10 May 2013 10:16:19 -0400 Subject: [PATCH 673/678] Enable setting security contexts on rootfs inodes. rootfs (ramfs) can support setting of security contexts by userspace due to the vfs fallback behavior of calling the security module to set the in-core inode state for security.* attributes when the filesystem does not provide an xattr handler. No xattr handler required as the inodes are pinned in memory and have no backing store. This is useful in allowing early userspace to label individual files within a rootfs while still providing a policy-defined default via genfs. Signed-off-by: Stephen Smalley --- security/selinux/hooks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6000792a055..9128cdc0071 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -423,6 +423,13 @@ static int sb_finish_set_opts(struct super_block *sb) if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) sbsec->flags |= SE_SBLABELSUPP; + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + sbsec->flags |= SE_SBLABELSUPP; + /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root); From 371a71df595b0f974705204591e85c2cd7da8c60 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 5 Aug 2014 15:25:10 -0700 Subject: [PATCH 674/678] HACK, DO NOT SHIP: Debug unmounting issue This adds extra debugging around unmount which is failing for Bug: 15445160 Change-Id: I509cc4f506ff39198fc215dbc6e10657ebf6e359 Signed-off-by: JP Abgrall --- fs/namespace.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 5e7f2e9a11c..edb80c0a67e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1265,6 +1265,7 @@ static int do_umount(struct vfsmount *mnt, int flags) LIST_HEAD(umount_list); retval = security_sb_umount(mnt, flags); + pr_err("DEBUG(jpa):%s(): security unmount %d\n", __func__, retval); if (retval) return retval; @@ -1275,6 +1276,7 @@ static int do_umount(struct vfsmount *mnt, int flags) * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] */ if (flags & MNT_EXPIRE) { + int mnt_count; if (mnt == current->fs->root.mnt || flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; @@ -1284,7 +1286,9 @@ static int do_umount(struct vfsmount *mnt, int flags) * all race cases, but it's a slowpath. */ br_write_lock(vfsmount_lock); - if (mnt_get_count(mnt) != 2) { + mnt_count = mnt_get_count(mnt); + pr_err("DEBUG(jpa):%s(): mnt_get_count() %d\n", __func__, mnt_count); + if (mnt_count != 2) { br_write_unlock(vfsmount_lock); return -EBUSY; } @@ -1323,8 +1327,10 @@ static int do_umount(struct vfsmount *mnt, int flags) * we just try to remount it readonly. */ down_write(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY)) + if (!(sb->s_flags & MS_RDONLY)) { retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); + pr_err("DEBUG(jpa):%s(): do_remount_sb() %d\n", __func__, retval); + } up_write(&sb->s_umount); return retval; } @@ -1336,6 +1342,7 @@ static int do_umount(struct vfsmount *mnt, int flags) if (!(flags & MNT_DETACH)) shrink_submounts(mnt, &umount_list); + pr_err("DEBUG(jpa):%s(): pre EBUSY (16)\n", __func__, retval); retval = -EBUSY; if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) @@ -1345,6 +1352,7 @@ static int do_umount(struct vfsmount *mnt, int flags) br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); + pr_err("DEBUG(jpa):%s(): ret %d\n", __func__, retval); return retval; } From e6eb2d1fc33d82ae92fc11c2216a556d0addaf58 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 5 Aug 2014 15:28:40 -0700 Subject: [PATCH 675/678] Revert "HACK, DO NOT SHIP: Debug unmounting issue" This reverts commit 3ed3cdbdee6a6e5ccc94131b83dd40244715e0ec. --- fs/namespace.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index edb80c0a67e..5e7f2e9a11c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1265,7 +1265,6 @@ static int do_umount(struct vfsmount *mnt, int flags) LIST_HEAD(umount_list); retval = security_sb_umount(mnt, flags); - pr_err("DEBUG(jpa):%s(): security unmount %d\n", __func__, retval); if (retval) return retval; @@ -1276,7 +1275,6 @@ static int do_umount(struct vfsmount *mnt, int flags) * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] */ if (flags & MNT_EXPIRE) { - int mnt_count; if (mnt == current->fs->root.mnt || flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; @@ -1286,9 +1284,7 @@ static int do_umount(struct vfsmount *mnt, int flags) * all race cases, but it's a slowpath. */ br_write_lock(vfsmount_lock); - mnt_count = mnt_get_count(mnt); - pr_err("DEBUG(jpa):%s(): mnt_get_count() %d\n", __func__, mnt_count); - if (mnt_count != 2) { + if (mnt_get_count(mnt) != 2) { br_write_unlock(vfsmount_lock); return -EBUSY; } @@ -1327,10 +1323,8 @@ static int do_umount(struct vfsmount *mnt, int flags) * we just try to remount it readonly. */ down_write(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY)) { + if (!(sb->s_flags & MS_RDONLY)) retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); - pr_err("DEBUG(jpa):%s(): do_remount_sb() %d\n", __func__, retval); - } up_write(&sb->s_umount); return retval; } @@ -1342,7 +1336,6 @@ static int do_umount(struct vfsmount *mnt, int flags) if (!(flags & MNT_DETACH)) shrink_submounts(mnt, &umount_list); - pr_err("DEBUG(jpa):%s(): pre EBUSY (16)\n", __func__, retval); retval = -EBUSY; if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) @@ -1352,7 +1345,6 @@ static int do_umount(struct vfsmount *mnt, int flags) br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); - pr_err("DEBUG(jpa):%s(): ret %d\n", __func__, retval); return retval; } From 58e5886111bfe6b29fd5d3e5c39f50707eeb05ea Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 30 Dec 2012 01:37:30 +0300 Subject: [PATCH 676/678] um: add missing declaration of 'getrlimit()' and friends commit fdfa4c952844fce881df8c76de9c7180cbe913ab upstream. arch/um/os-Linux/start_up.c: In function 'check_coredump_limit': arch/um/os-Linux/start_up.c:338:16: error: storage size of 'lim' isn't known arch/um/os-Linux/start_up.c:339:2: error: implicit declaration of function 'getrlimit' [-Werror=implicit-function-declaration] Change-Id: I360f4c66269e9295addf724d4c2ebf0e493c5fad Signed-off-by: Sergei Trofimovich CC: Jeff Dike CC: Richard Weinberger CC: Al Viro CC: user-mode-linux-devel@lists.sourceforge.net CC: user-mode-linux-user@lists.sourceforge.net CC: linux-kernel@vger.kernel.org Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/um/os-Linux/start_up.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 02ee9adff54..eac388e2aaf 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include "init.h" #include "kern_constants.h" From 07bf9b22c03595f448547058cf480297fadebba2 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Mon, 31 Mar 2014 16:23:51 +0900 Subject: [PATCH 677/678] net: core: Support UID-based routing. [android-3.4 commit 0b428749ce5969bc06c73855e360141b4e7126e8] This contains the following commits: 1. 0149763 net: core: Add a UID range to fib rules. 2. 1650474 net: core: Use the socket UID in routing lookups. 3. 0b16771 net: ipv4: Add the UID to the route cache. 4. ee058f1 net: core: Add a RTA_UID attribute to routes. This is so that userspace can do per-UID route lookups. Bug: 15413527 Bug: 16175826 Change-Id: I1285474c6734614d3bda6f61d88dfe89a4af7892 Signed-off-by: Lorenzo Colitti --- include/linux/fib_rules.h | 2 ++ include/linux/rtnetlink.h | 2 ++ include/net/fib_rules.h | 6 +++- include/net/flow.h | 8 ++++- include/net/ip.h | 1 + include/net/route.h | 6 ++-- net/core/fib_rules.c | 59 +++++++++++++++++++++++++++++++- net/ipv4/fib_frontend.c | 1 + net/ipv4/inet_connection_sock.c | 6 ++-- net/ipv4/ip_output.c | 3 +- net/ipv4/ping.c | 3 +- net/ipv4/raw.c | 3 +- net/ipv4/route.c | 12 +++++++ net/ipv4/syncookies.c | 3 +- net/ipv4/udp.c | 3 +- net/ipv4/xfrm4_policy.c | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/datagram.c | 1 + net/ipv6/inet6_connection_sock.c | 2 ++ net/ipv6/ping.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/route.c | 6 ++++ net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 3 ++ net/ipv6/udp.c | 1 + 25 files changed, 124 insertions(+), 12 deletions(-) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 51da65b68b8..9dcdb6251cb 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -49,6 +49,8 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_UID_START, /* UID range */ + FRA_UID_END, __FRA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e872ead88b..97a95808012 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -283,6 +283,8 @@ enum rtattr_type_t { RTA_MP_ALGO, /* no longer used */ RTA_TABLE, RTA_MARK, + RTA_MFC_STATS, /* not used - backported from the future */ + RTA_UID, __RTA_MAX }; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 075f1e3a0fe..52e77a366bf 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -23,6 +23,8 @@ struct fib_rule { struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + uid_t uid_start; + uid_t uid_end; struct rcu_head rcu; struct net * fr_net; }; @@ -79,7 +81,9 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_UID_START] = { .type = NLA_U32 }, \ + [FRA_UID_END] = { .type = NLA_U32 } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 57f15a7f1cd..1beab06a4ce 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -23,6 +23,7 @@ struct flowi_common { #define FLOWI_FLAG_PRECOW_METRICS 0x02 #define FLOWI_FLAG_CAN_SLEEP 0x04 __u32 flowic_secid; + uid_t flowic_uid; }; union flowi_uli { @@ -59,6 +60,7 @@ struct flowi4 { #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid +#define flowi4_uid __fl_common.flowic_uid __be32 daddr; __be32 saddr; union flowi_uli uli; @@ -75,7 +77,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be32 sport) + __be16 dport, __be32 sport, + uid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = 0; @@ -85,6 +88,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -102,6 +106,7 @@ struct flowi6 { #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -145,6 +150,7 @@ struct flowi { #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/ip.h b/include/net/ip.h index 0f6e8f1f645..d7c988fe292 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -165,6 +165,7 @@ struct ip_reply_arg { int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; + uid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/route.h b/include/net/route.h index db7b3432f07..36b9eb3080f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -60,6 +60,7 @@ struct rtable { int rt_iif; int rt_oif; __u32 rt_mark; + uid_t rt_uid; /* Info on neighbour */ __be32 rt_gateway; @@ -146,7 +147,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_i_uid(sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -250,7 +251,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sock_i_uid(sk)); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d0a1b52ea0d..c9f46b01e7f 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,6 +16,12 @@ #include #include +#define INVALID_UID ((uid_t) -1) +#define uid_valid(uid) ((uid) != -1) +#define uid_lte(a, b) ((a) <= (b)) +#define uid_eq(a, b) ((a) == (b)) +#define uid_gte(a, b) ((a) >= (b)) + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -30,6 +36,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->uid_start = INVALID_UID; + r->uid_end = INVALID_UID; r->fr_net = hold_net(ops->fro_net); /* The lock is not required here, the list in unreacheable @@ -176,6 +184,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static inline uid_t fib_nl_uid(struct nlattr *nla) +{ + return nla_get_u32(nla); +} + +static int nla_put_uid(struct sk_buff *skb, int idx, uid_t uid) +{ + return nla_put_u32(skb, idx, uid); +} + +static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) +{ + return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || + (uid_gte(fl->flowi_uid, rule->uid_start) && + uid_lte(fl->flowi_uid, rule->uid_end)); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -190,6 +215,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; + if (!fib_uid_range_match(fl, rule)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -360,6 +388,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + /* UID start and end must either both be valid or both unspecified. */ + rule->uid_start = rule->uid_end = INVALID_UID; + if (tb[FRA_UID_START] || tb[FRA_UID_END]) { + if (tb[FRA_UID_START] && tb[FRA_UID_END]) { + rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); + rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); + } + if (!uid_valid(rule->uid_start) || + !uid_valid(rule->uid_end) || + !uid_lte(rule->uid_start, rule->uid_end)) + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -466,6 +507,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) continue; + if (tb[FRA_UID_START] && + !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) + continue; + + if (tb[FRA_UID_END] && + !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -520,7 +569,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ - + nla_total_size(4); /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_UID_START */ + + nla_total_size(4); /* FRA_UID_END */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -578,6 +629,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->target) NLA_PUT_U32(skb, FRA_GOTO, rule->target); + if (uid_valid(rule->uid_start)) + nla_put_uid(skb, FRA_UID_START, rule->uid_start); + + if (uid_valid(rule->uid_end)) + nla_put_uid(skb, FRA_UID_END, rule->uid_end); + if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 92fc5f69f5d..a54817aced3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -482,6 +482,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1f9a5312b9f..2b8e7d7df33 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -362,7 +362,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -395,7 +396,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 070df0b113d..16ac1635db0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1493,7 +1493,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), daddr, rt->rt_spec_dst, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 353a44b5188..b79fa527029 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -771,7 +771,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 61714bd5292..415b3a806bb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -564,7 +564,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, + sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b5638545deb..6c58c9238c9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -741,6 +741,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | (rt1->rt_route_iif ^ rt2->rt_route_iif) | + (rt1->rt_uid ^ rt2->rt_uid) | (rt1->rt_oif ^ rt2->rt_oif)) == 0; } @@ -1886,6 +1887,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) fl4.flowi4_oif = rt->dst.dev->ifindex; fl4.flowi4_iif = skb->dev->ifindex; fl4.flowi4_mark = skb->mark; + fl4.flowi4_uid = skb->sk ? sock_i_uid(skb->sk) : 0; rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) @@ -2065,6 +2067,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2200,6 +2203,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2383,6 +2387,7 @@ out: return err; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2587,6 +2592,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; + rth->rt_uid = fl4->flowi4_uid; rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; @@ -2838,6 +2844,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) rt_is_output_route(rth) && rth->rt_oif == flp4->flowi4_oif && rth->rt_mark == flp4->flowi4_mark && + rth->rt_uid == flp4->flowi4_uid && !((rth->rt_key_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && @@ -2917,6 +2924,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; + rt->rt_uid = ort->rt_uid; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -3012,6 +3020,9 @@ static int rt_fill_info(struct net *net, if (rt->rt_mark) NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); + if (rt->rt_uid != (uid_t) -1) + NLA_PUT_BE32(skb, RTA_UID, rt->rt_uid); + error = rt->dst.error; if (peer) { inet_peer_refcheck(rt->peer); @@ -3127,6 +3138,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .flowi4_tos = rtm->rtm_tos, .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .flowi4_mark = mark, + .flowi4_uid = tb[RTA_UID] ? nla_get_u32(tb[RTA_UID]) : current_uid(), }; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0ab15bddbec..184a40f4564 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -353,7 +353,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + ireq->loc_addr, th->source, th->dest, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a19340a9..55feb88305f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -928,7 +928,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a0b4c5da8d4..e8ee4279fd2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -86,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; xdst->u.rt.rt_mark = fl4->flowi4_mark; + xdst->u.rt.rt_uid = fl4->flowi4_uid; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4be7f253a5f..d70560bd2e5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -702,6 +702,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b46e9f88ce3..c880af549e6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -160,6 +160,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d75a47db7fc..219023f1839 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -72,6 +72,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -222,6 +223,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_sport = inet->inet_sport; fl6.fl6_dport = inet->inet_dport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 4a5b24863c9..0082212c3d7 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -161,6 +161,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 343852e5c70..913830a4018 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -758,6 +758,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7388ee85885..113a70ff709 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2209,6 +2209,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2508,6 +2509,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (tb[RTA_OIF]) fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_UID]) + fl6.flowi6_uid = nla_get_u32(tb[RTA_UID]); + else + fl6.flowi6_uid = (iif ? (uid_t) -1 : current_uid()); + if (iif) { struct net_device *dev; dev = __dev_get_by_index(net, iif); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index dc673fcc7a7..a6a636d6a0e 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -246,6 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 572d556fb7b..0c08b0bbca6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -251,6 +251,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); final_p = fl6_update_dst(&fl6, np->opt, &final); @@ -404,6 +405,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); @@ -496,6 +498,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bb95e8e1c6f..b9bc3ca4995 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1084,6 +1084,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (msg->msg_controllen) { opt = &opt_space; From 0b18de7e73108f712c84138d826a1fd1d940ffde Mon Sep 17 00:00:00 2001 From: Sreeram Ramachandran Date: Tue, 8 Jul 2014 11:37:03 -0700 Subject: [PATCH 678/678] Handle 'sk' being NULL in UID-based routing. [android-3.4 commit 0836a0c191f580ed69254e0b287cdce58481e978] Bug: 15413527 Bug: 16175826 Change-Id: If33bebb7b52c0ebfa8dac2452607bce0c2b0faa0 Signed-off-by: Sreeram Ramachandran Signed-off-by: Lorenzo Colitti --- include/net/route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/route.h b/include/net/route.h index 36b9eb3080f..5e9519ed80e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -147,7 +147,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport, sock_i_uid(sk)); + daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk);